diff options
author | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-02-10 17:20:51 +0000 |
---|---|---|
committer | bstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4> | 2016-02-10 17:20:51 +0000 |
commit | 2d9d01985a7a7866916fafa19c5c296702e69714 (patch) | |
tree | 259c095c65fc0c6279b7a17755b3f851f51babb3 /gcc/config/rs6000/rs6000.c | |
parent | c8ebeb0e3c6b093e649592be7d51d1c0032a1dc7 (diff) | |
download | gcc-2d9d01985a7a7866916fafa19c5c296702e69714.tar.gz |
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>
{{merging with even more of GCC 6, using subversion 1.9
svn merge -r227001:227400 ^/trunk ;
there is some gengtype issue before svn r228000... }}
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233281 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 343 |
1 files changed, 313 insertions, 30 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 2a969782f26..8107bec8e6e 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3716,6 +3716,45 @@ rs6000_option_override_internal (bool global_init_p) else if (TARGET_FLOAT128 == FLOAT128_SW && !TARGET_VSX) error ("-mfloat128-software requires VSX support"); + /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07 + support. If we only have ISA 2.06 support, and the user did not specify + the switch, leave it set to -1 so the movmisalign patterns are enabled, + but we don't enable the full vectorization support */ + if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE) + TARGET_ALLOW_MOVMISALIGN = 1; + + else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX) + { + if (TARGET_ALLOW_MOVMISALIGN > 0) + error ("-mallow-movmisalign requires -mvsx"); + + TARGET_ALLOW_MOVMISALIGN = 0; + } + + /* Determine when unaligned vector accesses are permitted, and when + they are preferred over masked Altivec loads. Note that if + TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then + TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is + not true. */ + if (TARGET_EFFICIENT_UNALIGNED_VSX) + { + if (!TARGET_VSX) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mvsx"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + + else if (!TARGET_ALLOW_MOVMISALIGN) + { + if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX) + error ("-mefficient-unaligned-vsx requires -mallow-movmisalign"); + + rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX; + } + } + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); @@ -4275,22 +4314,6 @@ rs6000_option_override_internal (bool global_init_p) } } - /* Determine when unaligned vector accesses are permitted, and when - they are preferred over masked Altivec loads. Note that if - TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then - TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is - not true. */ - if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) { - if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8 - && TARGET_ALLOW_MOVMISALIGN != 0) - TARGET_EFFICIENT_UNALIGNED_VSX = 1; - else - TARGET_EFFICIENT_UNALIGNED_VSX = 0; - } - - if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8) - TARGET_ALLOW_MOVMISALIGN = 1; - /* Set the builtin mask of the various options used that could affect which builtins were used. In the past we used target_flags, but we've run out of bits, and some options like SPE and PAIRED are no longer in @@ -8462,7 +8485,7 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) during expand. */ gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed); - /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, + /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode, V1TImode). */ if (mode == TImode || mode == V1TImode) { @@ -18519,6 +18542,8 @@ rs6000_cannot_change_mode_class (machine_mode from, { unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to]; unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from]; + bool to_float128_vector_p = FLOAT128_VECTOR_P (to); + bool from_float128_vector_p = FLOAT128_VECTOR_P (from); /* Don't allow 64-bit types to overlap with 128-bit types that take a single register under VSX because the scalar part of the register @@ -18527,7 +18552,10 @@ rs6000_cannot_change_mode_class (machine_mode from, IEEE floating point can't overlap, and neither can small values. */ - if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode)) + if (to_float128_vector_p && from_float128_vector_p) + return false; + + else if (to_float128_vector_p || from_float128_vector_p) return true; /* TDmode in floating-mode registers must always go into a register @@ -18555,6 +18583,8 @@ rs6000_cannot_change_mode_class (machine_mode from, if (TARGET_E500_DOUBLE && ((((to) == DFmode) + ((from) == DFmode)) == 1 || (((to) == TFmode) + ((from) == TFmode)) == 1 + || (((to) == IFmode) + ((from) == IFmode)) == 1 + || (((to) == KFmode) + ((from) == KFmode)) == 1 || (((to) == DDmode) + ((from) == DDmode)) == 1 || (((to) == TDmode) + ((from) == TDmode)) == 1 || (((to) == DImode) + ((from) == DImode)) == 1)) @@ -18751,13 +18781,7 @@ rs6000_output_move_128bit (rtx operands[]) return output_vec_const_move (operands); } - if (TARGET_DEBUG_ADDR) - { - fprintf (stderr, "\n===== Bad 128 bit move:\n"); - debug_rtx (gen_rtx_SET (dest, src)); - } - - gcc_unreachable (); + fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src)); } /* Validate a 128-bit move. */ @@ -19801,6 +19825,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfeq_gpr (compare_result, op0, op1) : gen_cmptfeq_gpr (compare_result, op0, op1); @@ -19828,6 +19854,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfgt_gpr (compare_result, op0, op1) : gen_cmptfgt_gpr (compare_result, op0, op1); @@ -19855,6 +19883,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttflt_gpr (compare_result, op0, op1) : gen_cmptflt_gpr (compare_result, op0, op1); @@ -19892,6 +19922,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) break; case TFmode: + case IFmode: + case KFmode: cmp = (flag_finite_math_only && !flag_trapping_math) ? gen_tsttfeq_gpr (compare_result2, op0, op1) : gen_cmptfeq_gpr (compare_result2, op0, op1); @@ -19914,14 +19946,117 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) emit_insn (cmp); } + + /* IEEE 128-bit support in VSX registers. The comparison function (__cmpkf2) + returns 0..15 that is laid out the same way as the PowerPC CR register + would for a normal floating point comparison. */ + else if (FLOAT128_IEEE_P (mode)) + { + rtx and_reg = gen_reg_rtx (SImode); + rtx dest = gen_reg_rtx (SImode); + rtx libfunc = optab_libfunc (cmp_optab, mode); + HOST_WIDE_INT mask_value = 0; + + /* Values that __cmpkf2 returns. */ +#define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */ +#define PPC_CMP_EQUAL 0x2 /* a == b. */ +#define PPC_CMP_GREATER_THEN 0x4 /* a > b. */ +#define PPC_CMP_LESS_THEN 0x8 /* a < b. */ + + switch (code) + { + case EQ: + mask_value = PPC_CMP_EQUAL; + code = NE; + break; + + case NE: + mask_value = PPC_CMP_EQUAL; + code = EQ; + break; + + case GT: + mask_value = PPC_CMP_GREATER_THEN; + code = NE; + break; + + case GE: + mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL; + code = NE; + break; + + case LT: + mask_value = PPC_CMP_LESS_THEN; + code = NE; + break; + + case LE: + mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL; + code = NE; + break; + + case UNLE: + mask_value = PPC_CMP_GREATER_THEN; + code = EQ; + break; + + case UNLT: + mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL; + code = EQ; + break; + + case UNGE: + mask_value = PPC_CMP_LESS_THEN; + code = EQ; + break; + + case UNGT: + mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL; + code = EQ; + break; + + case UNEQ: + mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED; + code = NE; + + case LTGT: + mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED; + code = EQ; + break; + + case UNORDERED: + mask_value = PPC_CMP_UNORDERED; + code = NE; + break; + + case ORDERED: + mask_value = PPC_CMP_UNORDERED; + code = EQ; + break; + + default: + gcc_unreachable (); + } + + gcc_assert (mask_value != 0); + and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2, + op0, mode, op1, mode); + + emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value))); + compare_result = gen_reg_rtx (CCmode); + comp_mode = CCmode; + + emit_insn (gen_rtx_SET (compare_result, + gen_rtx_COMPARE (comp_mode, dest, const0_rtx))); + } + else { /* Generate XLC-compatible TFmode compare as PARALLEL with extra CLOBBERs to match cmptf_internal2 pattern. */ if (comp_mode == CCFPmode && TARGET_XL_COMPAT - && GET_MODE (op0) == TFmode - && !TARGET_IEEEQUAD - && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128) + && FLOAT128_IBM_P (GET_MODE (op0)) + && TARGET_HARD_FLOAT && TARGET_FPRS) emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (10, gen_rtx_SET (compare_result, @@ -19954,6 +20089,7 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) /* Some kinds of FP comparisons need an OR operation; under flag_finite_math_only we don't bother. */ if (FLOAT_MODE_P (mode) + && !FLOAT128_IEEE_P (mode) && !flag_finite_math_only && !(TARGET_HARD_FLOAT && !TARGET_FPRS) && (code == LE || code == GE @@ -19993,6 +20129,68 @@ rs6000_generate_compare (rtx cmp, machine_mode mode) } +/* Expand floating point conversion to/from __float128 and __ibm128. */ + +void +rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p) +{ + machine_mode dest_mode = GET_MODE (dest); + machine_mode src_mode = GET_MODE (src); + convert_optab cvt = unknown_optab; + rtx libfunc = NULL_RTX; + rtx dest2; + + if (dest_mode == src_mode) + gcc_unreachable (); + + if (FLOAT128_IEEE_P (dest_mode)) + { + if (src_mode == SFmode + || src_mode == DFmode + || FLOAT128_IBM_P (src_mode)) + cvt = sext_optab; + + else if (GET_MODE_CLASS (src_mode) == MODE_INT) + cvt = (unsigned_p) ? ufloat_optab : sfloat_optab; + + else if (FLOAT128_IEEE_P (src_mode)) + emit_move_insn (dest, gen_lowpart (dest_mode, src)); + + else + gcc_unreachable (); + } + + else if (FLOAT128_IEEE_P (src_mode)) + { + if (dest_mode == SFmode + || dest_mode == DFmode + || FLOAT128_IBM_P (dest_mode)) + cvt = trunc_optab; + + else if (GET_MODE_CLASS (dest_mode) == MODE_INT) + cvt = (unsigned_p) ? ufix_optab : sfix_optab; + + else + gcc_unreachable (); + } + + else + gcc_unreachable (); + + gcc_assert (cvt != unknown_optab); + libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode); + gcc_assert (libfunc != NULL_RTX); + + dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src, + src_mode); + + gcc_assert (dest != NULL_RTX); + if (!rtx_equal_p (dest, dest2)) + emit_move_insn (dest, dest2); + + return; +} + /* Emit the RTL for an sISEL pattern. */ void @@ -22635,6 +22833,7 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp) || ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2) && decl && !DECL_EXTERNAL (decl) + && !DECL_WEAK (decl) && (*targetm.binds_local_p) (decl)) || (DEFAULT_ABI == ABI_V4 && (!TARGET_SECURE_PLT @@ -32921,6 +33120,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "crypto", OPTION_MASK_CRYPTO, false, true }, { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true }, { "dlmzb", OPTION_MASK_DLMZB, false, true }, + { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX, + false, true }, { "fprnd", OPTION_MASK_FPRND, false, true }, { "hard-dfp", OPTION_MASK_DFP, false, true }, { "htm", OPTION_MASK_HTM, false, true }, @@ -34786,7 +34987,7 @@ class swap_web_entry : public web_entry_base /* A nonzero value indicates what kind of special handling for this insn is required if doublewords are swapped. Undefined if is_swappable is not set. */ - unsigned int special_handling : 3; + unsigned int special_handling : 4; /* Set if the web represented by this entry cannot be optimized. */ unsigned int web_not_optimizable : 1; /* Set if this insn should be deleted. */ @@ -34800,7 +35001,9 @@ enum special_handling_values { SH_NOSWAP_LD, SH_NOSWAP_ST, SH_EXTRACT, - SH_SPLAT + SH_SPLAT, + SH_XXPERMDI, + SH_CONCAT }; /* Union INSN with all insns containing definitions that reach USE. @@ -34992,6 +35195,20 @@ rtx_is_swappable_p (rtx op, unsigned int *special) *special = SH_EXTRACT; return 1; } + /* An XXPERMDI is ok if we adjust the lanes. Note that if the + XXPERMDI is a swap operation, it will be identified by + insn_is_swap_p and therefore we won't get here. */ + else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT + && (GET_MODE (XEXP (op, 0)) == V4DFmode + || GET_MODE (XEXP (op, 0)) == V4DImode) + && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL + && XVECLEN (parallel, 0) == 2 + && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT + && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT) + { + *special = SH_XXPERMDI; + return 1; + } else return 0; @@ -35169,6 +35386,17 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, return 1; } + /* A concatenation of two doublewords is ok if we reverse the + order of the inputs. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == VEC_CONCAT + && (GET_MODE (SET_SRC (body)) == V2DFmode + || GET_MODE (SET_SRC (body)) == V2DImode)) + { + *special = SH_CONCAT; + return 1; + } + /* Otherwise check the operands for vector lane violations. */ return rtx_is_swappable_p (body, special); } @@ -35458,6 +35686,49 @@ adjust_splat (rtx_insn *insn) fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn)); } +/* Given OP that contains an XXPERMDI operation (that is not a doubleword + swap), reverse the order of the source operands and adjust the indices + of the source lanes to account for doubleword reversal. */ +static void +adjust_xxpermdi (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx select = XEXP (set, 1); + rtx concat = XEXP (select, 0); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + rtx parallel = XEXP (select, 1); + int lane0 = INTVAL (XVECEXP (parallel, 0, 0)); + int lane1 = INTVAL (XVECEXP (parallel, 0, 1)); + int new_lane0 = 3 - lane1; + int new_lane1 = 3 - lane0; + XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0); + XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1); + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn)); +} + +/* Given OP that contains a VEC_CONCAT operation of two doublewords, + reverse the order of those inputs. */ +static void +adjust_concat (rtx_insn *insn) +{ + rtx set = PATTERN (insn); + rtx concat = XEXP (set, 1); + rtx src0 = XEXP (concat, 0); + XEXP (concat, 0) = XEXP (concat, 1); + XEXP (concat, 1) = src0; + INSN_CODE (insn) = -1; /* Force re-recognition. */ + df_insn_rescan (insn); + + if (dump_file) + fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn)); +} + /* The insn described by INSN_ENTRY[I] can be swapped, but only with special handling. Take care of that here. */ static void @@ -35504,6 +35775,14 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i) /* Change the lane on a direct-splat operation. */ adjust_splat (insn); break; + case SH_XXPERMDI: + /* Change the lanes on an XXPERMDI operation. */ + adjust_xxpermdi (insn); + break; + case SH_CONCAT: + /* Reverse the order of a concatenation operation. */ + adjust_concat (insn); + break; } } @@ -35576,6 +35855,10 @@ dump_swap_insn_table (swap_web_entry *insn_entry) fputs ("special:extract ", dump_file); else if (insn_entry[i].special_handling == SH_SPLAT) fputs ("special:splat ", dump_file); + else if (insn_entry[i].special_handling == SH_XXPERMDI) + fputs ("special:xxpermdi ", dump_file); + else if (insn_entry[i].special_handling == SH_CONCAT) + fputs ("special:concat ", dump_file); } if (insn_entry[i].web_not_optimizable) fputs ("unoptimizable ", dump_file); |