summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000/rs6000.c
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2016-02-10 17:20:51 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2016-02-10 17:20:51 +0000
commit2d9d01985a7a7866916fafa19c5c296702e69714 (patch)
tree259c095c65fc0c6279b7a17755b3f851f51babb3 /gcc/config/rs6000/rs6000.c
parentc8ebeb0e3c6b093e649592be7d51d1c0032a1dc7 (diff)
downloadgcc-2d9d01985a7a7866916fafa19c5c296702e69714.tar.gz
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>
{{merging with even more of GCC 6, using subversion 1.9 svn merge -r227001:227400 ^/trunk ; there is some gengtype issue before svn r228000... }} git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233281 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000/rs6000.c')
-rw-r--r--gcc/config/rs6000/rs6000.c343
1 files changed, 313 insertions, 30 deletions
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2a969782f26..8107bec8e6e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3716,6 +3716,45 @@ rs6000_option_override_internal (bool global_init_p)
else if (TARGET_FLOAT128 == FLOAT128_SW && !TARGET_VSX)
error ("-mfloat128-software requires VSX support");
+ /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
+ support. If we only have ISA 2.06 support, and the user did not specify
+ the switch, leave it set to -1 so the movmisalign patterns are enabled,
+ but we don't enable the full vectorization support */
+ if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
+ TARGET_ALLOW_MOVMISALIGN = 1;
+
+ else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
+ {
+ if (TARGET_ALLOW_MOVMISALIGN > 0)
+ error ("-mallow-movmisalign requires -mvsx");
+
+ TARGET_ALLOW_MOVMISALIGN = 0;
+ }
+
+ /* Determine when unaligned vector accesses are permitted, and when
+ they are preferred over masked Altivec loads. Note that if
+ TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
+ TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
+ not true. */
+ if (TARGET_EFFICIENT_UNALIGNED_VSX)
+ {
+ if (!TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
+ error ("-mefficient-unaligned-vsx requires -mvsx");
+
+ rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
+ }
+
+ else if (!TARGET_ALLOW_MOVMISALIGN)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
+ error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
+
+ rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
+ }
+ }
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
@@ -4275,22 +4314,6 @@ rs6000_option_override_internal (bool global_init_p)
}
}
- /* Determine when unaligned vector accesses are permitted, and when
- they are preferred over masked Altivec loads. Note that if
- TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
- TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
- not true. */
- if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) {
- if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8
- && TARGET_ALLOW_MOVMISALIGN != 0)
- TARGET_EFFICIENT_UNALIGNED_VSX = 1;
- else
- TARGET_EFFICIENT_UNALIGNED_VSX = 0;
- }
-
- if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8)
- TARGET_ALLOW_MOVMISALIGN = 1;
-
/* Set the builtin mask of the various options used that could affect which
builtins were used. In the past we used target_flags, but we've run out
of bits, and some options like SPE and PAIRED are no longer in
@@ -8462,7 +8485,7 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
during expand. */
gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
- /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+ /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
V1TImode). */
if (mode == TImode || mode == V1TImode)
{
@@ -18519,6 +18542,8 @@ rs6000_cannot_change_mode_class (machine_mode from,
{
unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
+ bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
+ bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
/* Don't allow 64-bit types to overlap with 128-bit types that take a
single register under VSX because the scalar part of the register
@@ -18527,7 +18552,10 @@ rs6000_cannot_change_mode_class (machine_mode from,
IEEE floating point can't overlap, and neither can small
values. */
- if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
+ if (to_float128_vector_p && from_float128_vector_p)
+ return false;
+
+ else if (to_float128_vector_p || from_float128_vector_p)
return true;
/* TDmode in floating-mode registers must always go into a register
@@ -18555,6 +18583,8 @@ rs6000_cannot_change_mode_class (machine_mode from,
if (TARGET_E500_DOUBLE
&& ((((to) == DFmode) + ((from) == DFmode)) == 1
|| (((to) == TFmode) + ((from) == TFmode)) == 1
+ || (((to) == IFmode) + ((from) == IFmode)) == 1
+ || (((to) == KFmode) + ((from) == KFmode)) == 1
|| (((to) == DDmode) + ((from) == DDmode)) == 1
|| (((to) == TDmode) + ((from) == TDmode)) == 1
|| (((to) == DImode) + ((from) == DImode)) == 1))
@@ -18751,13 +18781,7 @@ rs6000_output_move_128bit (rtx operands[])
return output_vec_const_move (operands);
}
- if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr, "\n===== Bad 128 bit move:\n");
- debug_rtx (gen_rtx_SET (dest, src));
- }
-
- gcc_unreachable ();
+ fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
}
/* Validate a 128-bit move. */
@@ -19801,6 +19825,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfeq_gpr (compare_result, op0, op1)
: gen_cmptfeq_gpr (compare_result, op0, op1);
@@ -19828,6 +19854,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfgt_gpr (compare_result, op0, op1)
: gen_cmptfgt_gpr (compare_result, op0, op1);
@@ -19855,6 +19883,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttflt_gpr (compare_result, op0, op1)
: gen_cmptflt_gpr (compare_result, op0, op1);
@@ -19892,6 +19922,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfeq_gpr (compare_result2, op0, op1)
: gen_cmptfeq_gpr (compare_result2, op0, op1);
@@ -19914,14 +19946,117 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
emit_insn (cmp);
}
+
+ /* IEEE 128-bit support in VSX registers. The comparison function (__cmpkf2)
+ returns 0..15 that is laid out the same way as the PowerPC CR register
+ would for a normal floating point comparison. */
+ else if (FLOAT128_IEEE_P (mode))
+ {
+ rtx and_reg = gen_reg_rtx (SImode);
+ rtx dest = gen_reg_rtx (SImode);
+ rtx libfunc = optab_libfunc (cmp_optab, mode);
+ HOST_WIDE_INT mask_value = 0;
+
+ /* Values that __cmpkf2 returns. */
+#define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */
+#define PPC_CMP_EQUAL 0x2 /* a == b. */
+#define PPC_CMP_GREATER_THEN 0x4 /* a > b. */
+#define PPC_CMP_LESS_THEN 0x8 /* a < b. */
+
+ switch (code)
+ {
+ case EQ:
+ mask_value = PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case NE:
+ mask_value = PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case GT:
+ mask_value = PPC_CMP_GREATER_THEN;
+ code = NE;
+ break;
+
+ case GE:
+ mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case LT:
+ mask_value = PPC_CMP_LESS_THEN;
+ code = NE;
+ break;
+
+ case LE:
+ mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case UNLE:
+ mask_value = PPC_CMP_GREATER_THEN;
+ code = EQ;
+ break;
+
+ case UNLT:
+ mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case UNGE:
+ mask_value = PPC_CMP_LESS_THEN;
+ code = EQ;
+ break;
+
+ case UNGT:
+ mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case UNEQ:
+ mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
+ code = NE;
+
+ case LTGT:
+ mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
+ code = EQ;
+ break;
+
+ case UNORDERED:
+ mask_value = PPC_CMP_UNORDERED;
+ code = NE;
+ break;
+
+ case ORDERED:
+ mask_value = PPC_CMP_UNORDERED;
+ code = EQ;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (mask_value != 0);
+ and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2,
+ op0, mode, op1, mode);
+
+ emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value)));
+ compare_result = gen_reg_rtx (CCmode);
+ comp_mode = CCmode;
+
+ emit_insn (gen_rtx_SET (compare_result,
+ gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
+ }
+
else
{
/* Generate XLC-compatible TFmode compare as PARALLEL with extra
CLOBBERs to match cmptf_internal2 pattern. */
if (comp_mode == CCFPmode && TARGET_XL_COMPAT
- && GET_MODE (op0) == TFmode
- && !TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
+ && FLOAT128_IBM_P (GET_MODE (op0))
+ && TARGET_HARD_FLOAT && TARGET_FPRS)
emit_insn (gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (10,
gen_rtx_SET (compare_result,
@@ -19954,6 +20089,7 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
/* Some kinds of FP comparisons need an OR operation;
under flag_finite_math_only we don't bother. */
if (FLOAT_MODE_P (mode)
+ && !FLOAT128_IEEE_P (mode)
&& !flag_finite_math_only
&& !(TARGET_HARD_FLOAT && !TARGET_FPRS)
&& (code == LE || code == GE
@@ -19993,6 +20129,68 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
}
+/* Expand floating point conversion to/from __float128 and __ibm128. */
+
+void
+rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
+{
+ machine_mode dest_mode = GET_MODE (dest);
+ machine_mode src_mode = GET_MODE (src);
+ convert_optab cvt = unknown_optab;
+ rtx libfunc = NULL_RTX;
+ rtx dest2;
+
+ if (dest_mode == src_mode)
+ gcc_unreachable ();
+
+ if (FLOAT128_IEEE_P (dest_mode))
+ {
+ if (src_mode == SFmode
+ || src_mode == DFmode
+ || FLOAT128_IBM_P (src_mode))
+ cvt = sext_optab;
+
+ else if (GET_MODE_CLASS (src_mode) == MODE_INT)
+ cvt = (unsigned_p) ? ufloat_optab : sfloat_optab;
+
+ else if (FLOAT128_IEEE_P (src_mode))
+ emit_move_insn (dest, gen_lowpart (dest_mode, src));
+
+ else
+ gcc_unreachable ();
+ }
+
+ else if (FLOAT128_IEEE_P (src_mode))
+ {
+ if (dest_mode == SFmode
+ || dest_mode == DFmode
+ || FLOAT128_IBM_P (dest_mode))
+ cvt = trunc_optab;
+
+ else if (GET_MODE_CLASS (dest_mode) == MODE_INT)
+ cvt = (unsigned_p) ? ufix_optab : sfix_optab;
+
+ else
+ gcc_unreachable ();
+ }
+
+ else
+ gcc_unreachable ();
+
+ gcc_assert (cvt != unknown_optab);
+ libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
+ gcc_assert (libfunc != NULL_RTX);
+
+ dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
+ src_mode);
+
+ gcc_assert (dest != NULL_RTX);
+ if (!rtx_equal_p (dest, dest2))
+ emit_move_insn (dest, dest2);
+
+ return;
+}
+
/* Emit the RTL for an sISEL pattern. */
void
@@ -22635,6 +22833,7 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp)
|| ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& decl
&& !DECL_EXTERNAL (decl)
+ && !DECL_WEAK (decl)
&& (*targetm.binds_local_p) (decl))
|| (DEFAULT_ABI == ABI_V4
&& (!TARGET_SECURE_PLT
@@ -32921,6 +33120,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "crypto", OPTION_MASK_CRYPTO, false, true },
{ "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
+ { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
+ false, true },
{ "fprnd", OPTION_MASK_FPRND, false, true },
{ "hard-dfp", OPTION_MASK_DFP, false, true },
{ "htm", OPTION_MASK_HTM, false, true },
@@ -34786,7 +34987,7 @@ class swap_web_entry : public web_entry_base
/* A nonzero value indicates what kind of special handling for this
insn is required if doublewords are swapped. Undefined if
is_swappable is not set. */
- unsigned int special_handling : 3;
+ unsigned int special_handling : 4;
/* Set if the web represented by this entry cannot be optimized. */
unsigned int web_not_optimizable : 1;
/* Set if this insn should be deleted. */
@@ -34800,7 +35001,9 @@ enum special_handling_values {
SH_NOSWAP_LD,
SH_NOSWAP_ST,
SH_EXTRACT,
- SH_SPLAT
+ SH_SPLAT,
+ SH_XXPERMDI,
+ SH_CONCAT
};
/* Union INSN with all insns containing definitions that reach USE.
@@ -34992,6 +35195,20 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
*special = SH_EXTRACT;
return 1;
}
+ /* An XXPERMDI is ok if we adjust the lanes. Note that if the
+ XXPERMDI is a swap operation, it will be identified by
+ insn_is_swap_p and therefore we won't get here. */
+ else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
+ && (GET_MODE (XEXP (op, 0)) == V4DFmode
+ || GET_MODE (XEXP (op, 0)) == V4DImode)
+ && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+ && XVECLEN (parallel, 0) == 2
+ && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
+ && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
+ {
+ *special = SH_XXPERMDI;
+ return 1;
+ }
else
return 0;
@@ -35169,6 +35386,17 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
return 1;
}
+ /* A concatenation of two doublewords is ok if we reverse the
+ order of the inputs. */
+ if (GET_CODE (body) == SET
+ && GET_CODE (SET_SRC (body)) == VEC_CONCAT
+ && (GET_MODE (SET_SRC (body)) == V2DFmode
+ || GET_MODE (SET_SRC (body)) == V2DImode))
+ {
+ *special = SH_CONCAT;
+ return 1;
+ }
+
/* Otherwise check the operands for vector lane violations. */
return rtx_is_swappable_p (body, special);
}
@@ -35458,6 +35686,49 @@ adjust_splat (rtx_insn *insn)
fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
}
+/* Given OP that contains an XXPERMDI operation (that is not a doubleword
+ swap), reverse the order of the source operands and adjust the indices
+ of the source lanes to account for doubleword reversal. */
+static void
+adjust_xxpermdi (rtx_insn *insn)
+{
+ rtx set = PATTERN (insn);
+ rtx select = XEXP (set, 1);
+ rtx concat = XEXP (select, 0);
+ rtx src0 = XEXP (concat, 0);
+ XEXP (concat, 0) = XEXP (concat, 1);
+ XEXP (concat, 1) = src0;
+ rtx parallel = XEXP (select, 1);
+ int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
+ int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
+ int new_lane0 = 3 - lane1;
+ int new_lane1 = 3 - lane0;
+ XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
+ XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
+}
+
+/* Given OP that contains a VEC_CONCAT operation of two doublewords,
+ reverse the order of those inputs. */
+static void
+adjust_concat (rtx_insn *insn)
+{
+ rtx set = PATTERN (insn);
+ rtx concat = XEXP (set, 1);
+ rtx src0 = XEXP (concat, 0);
+ XEXP (concat, 0) = XEXP (concat, 1);
+ XEXP (concat, 1) = src0;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
+}
+
/* The insn described by INSN_ENTRY[I] can be swapped, but only
with special handling. Take care of that here. */
static void
@@ -35504,6 +35775,14 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
/* Change the lane on a direct-splat operation. */
adjust_splat (insn);
break;
+ case SH_XXPERMDI:
+ /* Change the lanes on an XXPERMDI operation. */
+ adjust_xxpermdi (insn);
+ break;
+ case SH_CONCAT:
+ /* Reverse the order of a concatenation operation. */
+ adjust_concat (insn);
+ break;
}
}
@@ -35576,6 +35855,10 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
fputs ("special:extract ", dump_file);
else if (insn_entry[i].special_handling == SH_SPLAT)
fputs ("special:splat ", dump_file);
+ else if (insn_entry[i].special_handling == SH_XXPERMDI)
+ fputs ("special:xxpermdi ", dump_file);
+ else if (insn_entry[i].special_handling == SH_CONCAT)
+ fputs ("special:concat ", dump_file);
}
if (insn_entry[i].web_not_optimizable)
fputs ("unoptimizable ", dump_file);