summaryrefslogtreecommitdiff
path: root/gcc/config/rs6000
diff options
context:
space:
mode:
authorbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2016-02-10 17:20:51 +0000
committerbstarynk <bstarynk@138bc75d-0d04-0410-961f-82ee72b054a4>2016-02-10 17:20:51 +0000
commit2d9d01985a7a7866916fafa19c5c296702e69714 (patch)
tree259c095c65fc0c6279b7a17755b3f851f51babb3 /gcc/config/rs6000
parentc8ebeb0e3c6b093e649592be7d51d1c0032a1dc7 (diff)
downloadgcc-2d9d01985a7a7866916fafa19c5c296702e69714.tar.gz
2016-02-10 Basile Starynkevitch <basile@starynkevitch.net>
{{merging with even more of GCC 6, using subversion 1.9 svn merge -r227001:227400 ^/trunk ; there is some gengtype issue before svn r228000... }} git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/melt-branch@233281 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/rs6000')
-rw-r--r--gcc/config/rs6000/altivec.h2
-rw-r--r--gcc/config/rs6000/predicates.md31
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def5
-rw-r--r--gcc/config/rs6000/rs6000-c.c21
-rw-r--r--gcc/config/rs6000/rs6000-cpus.def2
-rw-r--r--gcc/config/rs6000/rs6000-protos.h1
-rw-r--r--gcc/config/rs6000/rs6000.c343
-rw-r--r--gcc/config/rs6000/rs6000.md458
-rw-r--r--gcc/config/rs6000/rs6000.opt2
-rw-r--r--gcc/config/rs6000/sysv4.h6
-rw-r--r--gcc/config/rs6000/sysv4le.h4
-rw-r--r--gcc/config/rs6000/vector.md23
12 files changed, 757 insertions, 141 deletions
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 3ef6bc85ecd..1c00099c78d 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -208,6 +208,8 @@
#define vec_lvebx __builtin_vec_lvebx
#define vec_lvehx __builtin_vec_lvehx
#define vec_lvewx __builtin_vec_lvewx
+#define vec_pmsum_be __builtin_vec_vpmsum
+#define vec_shasigma_be __builtin_crypto_vshasigma
/* Cell only intrinsics. */
#ifdef __PPU__
#define vec_lvlx __builtin_vec_lvlx
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index ae74796849d..3edb4774e75 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -239,6 +239,25 @@
return INT_REGNO_P (REGNO (op));
})
+;; Like int_reg_operand, but don't return true for pseudo registers
+(define_predicate "int_reg_operand_not_pseudo"
+ (match_operand 0 "register_operand")
+{
+ if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+ return 0;
+
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return 0;
+
+ if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+ return 0;
+
+ return INT_REGNO_P (REGNO (op));
+})
+
;; Like int_reg_operand, but only return true for base registers
(define_predicate "base_reg_operand"
(match_operand 0 "int_reg_operand")
@@ -883,12 +902,12 @@
(define_predicate "current_file_function_operand"
(and (match_code "symbol_ref")
(match_test "(DEFAULT_ABI != ABI_AIX || SYMBOL_REF_FUNCTION_P (op))
- && ((SYMBOL_REF_LOCAL_P (op)
- && ((DEFAULT_ABI != ABI_AIX
- && DEFAULT_ABI != ABI_ELFv2)
- || !SYMBOL_REF_EXTERNAL_P (op)))
- || (op == XEXP (DECL_RTL (current_function_decl),
- 0)))")))
+ && (SYMBOL_REF_LOCAL_P (op)
+ || op == XEXP (DECL_RTL (current_function_decl), 0))
+ && !((DEFAULT_ABI == ABI_AIX
+ || DEFAULT_ABI == ABI_ELFv2)
+ && (SYMBOL_REF_EXTERNAL_P (op)
+ || SYMBOL_REF_WEAK (op)))")))
;; Return 1 if this operand is a valid input for a move insn.
(define_predicate "input_operand"
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 7beddf64d1b..85082ec0ee2 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1489,6 +1489,10 @@ BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum)
BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss)
BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus)
BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpksdus)
+BU_P8V_AV_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb)
+BU_P8V_AV_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh)
+BU_P8V_AV_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw)
+BU_P8V_AV_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd)
BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3)
BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3)
BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3)
@@ -1570,6 +1574,7 @@ BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss")
BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus")
BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum")
BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus")
+BU_P8V_OVERLOAD_2 (VPMSUM, "vpmsum")
BU_P8V_OVERLOAD_2 (VRLD, "vrld")
BU_P8V_OVERLOAD_2 (VSLD, "vsld")
BU_P8V_OVERLOAD_2 (VSRAD, "vsrad")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index d45bc93b10a..5fc2b53adfe 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -2937,6 +2937,14 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
{ ALTIVEC_BUILTIN_VEC_MADD, VSX_BUILTIN_XVMADDDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+ { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+ { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+ { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+ RS6000_BTI_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
+ { ALTIVEC_BUILTIN_VEC_MADD, ALTIVEC_BUILTIN_VMLADDUHM,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
{ ALTIVEC_BUILTIN_VEC_MADDS, ALTIVEC_BUILTIN_VMHADDSHS,
RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI },
{ ALTIVEC_BUILTIN_VEC_MLADD, ALTIVEC_BUILTIN_VMLADDUHM,
@@ -4171,6 +4179,19 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMB,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMH,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMW,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VPMSUM, P8V_BUILTIN_VPMSUMD,
+ RS6000_BTI_unsigned_V1TI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
{ P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
{ P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 9fd565286f2..03764aef740 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -53,6 +53,7 @@
| OPTION_MASK_P8_VECTOR \
| OPTION_MASK_CRYPTO \
| OPTION_MASK_DIRECT_MOVE \
+ | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \
| OPTION_MASK_HTM \
| OPTION_MASK_QUAD_MEMORY \
| OPTION_MASK_QUAD_MEMORY_ATOMIC \
@@ -78,6 +79,7 @@
| OPTION_MASK_DFP \
| OPTION_MASK_DIRECT_MOVE \
| OPTION_MASK_DLMZB \
+ | OPTION_MASK_EFFICIENT_UNALIGNED_VSX \
| OPTION_MASK_FPRND \
| OPTION_MASK_HTM \
| OPTION_MASK_ISEL \
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index 7262a151438..7be529fab49 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -54,6 +54,7 @@ extern const char *output_vec_const_move (rtx *);
extern const char *rs6000_output_move_128bit (rtx *);
extern bool rs6000_move_128bit_ok_p (rtx []);
extern bool rs6000_split_128bit_ok_p (rtx []);
+extern void rs6000_expand_float128_convert (rtx, rtx, bool);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index 2a969782f26..8107bec8e6e 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -3716,6 +3716,45 @@ rs6000_option_override_internal (bool global_init_p)
else if (TARGET_FLOAT128 == FLOAT128_SW && !TARGET_VSX)
error ("-mfloat128-software requires VSX support");
+ /* Set -mallow-movmisalign to explicitly on if we have full ISA 2.07
+ support. If we only have ISA 2.06 support, and the user did not specify
+ the switch, leave it set to -1 so the movmisalign patterns are enabled,
+ but we don't enable the full vectorization support */
+ if (TARGET_ALLOW_MOVMISALIGN == -1 && TARGET_P8_VECTOR && TARGET_DIRECT_MOVE)
+ TARGET_ALLOW_MOVMISALIGN = 1;
+
+ else if (TARGET_ALLOW_MOVMISALIGN && !TARGET_VSX)
+ {
+ if (TARGET_ALLOW_MOVMISALIGN > 0)
+ error ("-mallow-movmisalign requires -mvsx");
+
+ TARGET_ALLOW_MOVMISALIGN = 0;
+ }
+
+ /* Determine when unaligned vector accesses are permitted, and when
+ they are preferred over masked Altivec loads. Note that if
+ TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
+ TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
+ not true. */
+ if (TARGET_EFFICIENT_UNALIGNED_VSX)
+ {
+ if (!TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
+ error ("-mefficient-unaligned-vsx requires -mvsx");
+
+ rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
+ }
+
+ else if (!TARGET_ALLOW_MOVMISALIGN)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_EFFICIENT_UNALIGNED_VSX)
+ error ("-mefficient-unaligned-vsx requires -mallow-movmisalign");
+
+ rs6000_isa_flags &= ~OPTION_MASK_EFFICIENT_UNALIGNED_VSX;
+ }
+ }
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
@@ -4275,22 +4314,6 @@ rs6000_option_override_internal (bool global_init_p)
}
}
- /* Determine when unaligned vector accesses are permitted, and when
- they are preferred over masked Altivec loads. Note that if
- TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then
- TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is
- not true. */
- if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) {
- if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8
- && TARGET_ALLOW_MOVMISALIGN != 0)
- TARGET_EFFICIENT_UNALIGNED_VSX = 1;
- else
- TARGET_EFFICIENT_UNALIGNED_VSX = 0;
- }
-
- if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8)
- TARGET_ALLOW_MOVMISALIGN = 1;
-
/* Set the builtin mask of the various options used that could affect which
builtins were used. In the past we used target_flags, but we've run out
of bits, and some options like SPE and PAIRED are no longer in
@@ -8462,7 +8485,7 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode)
during expand. */
gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed);
- /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode,
+ /* Use V2DImode to do swaps of types with 128-bit scalar parts (TImode,
V1TImode). */
if (mode == TImode || mode == V1TImode)
{
@@ -18519,6 +18542,8 @@ rs6000_cannot_change_mode_class (machine_mode from,
{
unsigned to_nregs = hard_regno_nregs[FIRST_FPR_REGNO][to];
unsigned from_nregs = hard_regno_nregs[FIRST_FPR_REGNO][from];
+ bool to_float128_vector_p = FLOAT128_VECTOR_P (to);
+ bool from_float128_vector_p = FLOAT128_VECTOR_P (from);
/* Don't allow 64-bit types to overlap with 128-bit types that take a
single register under VSX because the scalar part of the register
@@ -18527,7 +18552,10 @@ rs6000_cannot_change_mode_class (machine_mode from,
IEEE floating point can't overlap, and neither can small
values. */
- if (TARGET_IEEEQUAD && (to == TFmode || from == TFmode))
+ if (to_float128_vector_p && from_float128_vector_p)
+ return false;
+
+ else if (to_float128_vector_p || from_float128_vector_p)
return true;
/* TDmode in floating-mode registers must always go into a register
@@ -18555,6 +18583,8 @@ rs6000_cannot_change_mode_class (machine_mode from,
if (TARGET_E500_DOUBLE
&& ((((to) == DFmode) + ((from) == DFmode)) == 1
|| (((to) == TFmode) + ((from) == TFmode)) == 1
+ || (((to) == IFmode) + ((from) == IFmode)) == 1
+ || (((to) == KFmode) + ((from) == KFmode)) == 1
|| (((to) == DDmode) + ((from) == DDmode)) == 1
|| (((to) == TDmode) + ((from) == TDmode)) == 1
|| (((to) == DImode) + ((from) == DImode)) == 1))
@@ -18751,13 +18781,7 @@ rs6000_output_move_128bit (rtx operands[])
return output_vec_const_move (operands);
}
- if (TARGET_DEBUG_ADDR)
- {
- fprintf (stderr, "\n===== Bad 128 bit move:\n");
- debug_rtx (gen_rtx_SET (dest, src));
- }
-
- gcc_unreachable ();
+ fatal_insn ("Bad 128-bit move", gen_rtx_SET (dest, src));
}
/* Validate a 128-bit move. */
@@ -19801,6 +19825,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfeq_gpr (compare_result, op0, op1)
: gen_cmptfeq_gpr (compare_result, op0, op1);
@@ -19828,6 +19854,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfgt_gpr (compare_result, op0, op1)
: gen_cmptfgt_gpr (compare_result, op0, op1);
@@ -19855,6 +19883,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttflt_gpr (compare_result, op0, op1)
: gen_cmptflt_gpr (compare_result, op0, op1);
@@ -19892,6 +19922,8 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
break;
case TFmode:
+ case IFmode:
+ case KFmode:
cmp = (flag_finite_math_only && !flag_trapping_math)
? gen_tsttfeq_gpr (compare_result2, op0, op1)
: gen_cmptfeq_gpr (compare_result2, op0, op1);
@@ -19914,14 +19946,117 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
emit_insn (cmp);
}
+
+ /* IEEE 128-bit support in VSX registers. The comparison function (__cmpkf2)
+ returns 0..15 that is laid out the same way as the PowerPC CR register
+ would for a normal floating point comparison. */
+ else if (FLOAT128_IEEE_P (mode))
+ {
+ rtx and_reg = gen_reg_rtx (SImode);
+ rtx dest = gen_reg_rtx (SImode);
+ rtx libfunc = optab_libfunc (cmp_optab, mode);
+ HOST_WIDE_INT mask_value = 0;
+
+ /* Values that __cmpkf2 returns. */
+#define PPC_CMP_UNORDERED 0x1 /* isnan (a) || isnan (b). */
+#define PPC_CMP_EQUAL 0x2 /* a == b. */
+#define PPC_CMP_GREATER_THEN 0x4 /* a > b. */
+#define PPC_CMP_LESS_THEN 0x8 /* a < b. */
+
+ switch (code)
+ {
+ case EQ:
+ mask_value = PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case NE:
+ mask_value = PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case GT:
+ mask_value = PPC_CMP_GREATER_THEN;
+ code = NE;
+ break;
+
+ case GE:
+ mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case LT:
+ mask_value = PPC_CMP_LESS_THEN;
+ code = NE;
+ break;
+
+ case LE:
+ mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
+ code = NE;
+ break;
+
+ case UNLE:
+ mask_value = PPC_CMP_GREATER_THEN;
+ code = EQ;
+ break;
+
+ case UNLT:
+ mask_value = PPC_CMP_GREATER_THEN | PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case UNGE:
+ mask_value = PPC_CMP_LESS_THEN;
+ code = EQ;
+ break;
+
+ case UNGT:
+ mask_value = PPC_CMP_LESS_THEN | PPC_CMP_EQUAL;
+ code = EQ;
+ break;
+
+ case UNEQ:
+ mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
+ code = NE;
+
+ case LTGT:
+ mask_value = PPC_CMP_EQUAL | PPC_CMP_UNORDERED;
+ code = EQ;
+ break;
+
+ case UNORDERED:
+ mask_value = PPC_CMP_UNORDERED;
+ code = NE;
+ break;
+
+ case ORDERED:
+ mask_value = PPC_CMP_UNORDERED;
+ code = EQ;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ gcc_assert (mask_value != 0);
+ and_reg = emit_library_call_value (libfunc, and_reg, LCT_CONST, SImode, 2,
+ op0, mode, op1, mode);
+
+ emit_insn (gen_andsi3 (dest, and_reg, GEN_INT (mask_value)));
+ compare_result = gen_reg_rtx (CCmode);
+ comp_mode = CCmode;
+
+ emit_insn (gen_rtx_SET (compare_result,
+ gen_rtx_COMPARE (comp_mode, dest, const0_rtx)));
+ }
+
else
{
/* Generate XLC-compatible TFmode compare as PARALLEL with extra
CLOBBERs to match cmptf_internal2 pattern. */
if (comp_mode == CCFPmode && TARGET_XL_COMPAT
- && GET_MODE (op0) == TFmode
- && !TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128)
+ && FLOAT128_IBM_P (GET_MODE (op0))
+ && TARGET_HARD_FLOAT && TARGET_FPRS)
emit_insn (gen_rtx_PARALLEL (VOIDmode,
gen_rtvec (10,
gen_rtx_SET (compare_result,
@@ -19954,6 +20089,7 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
/* Some kinds of FP comparisons need an OR operation;
under flag_finite_math_only we don't bother. */
if (FLOAT_MODE_P (mode)
+ && !FLOAT128_IEEE_P (mode)
&& !flag_finite_math_only
&& !(TARGET_HARD_FLOAT && !TARGET_FPRS)
&& (code == LE || code == GE
@@ -19993,6 +20129,68 @@ rs6000_generate_compare (rtx cmp, machine_mode mode)
}
+/* Expand floating point conversion to/from __float128 and __ibm128. */
+
+void
+rs6000_expand_float128_convert (rtx dest, rtx src, bool unsigned_p)
+{
+ machine_mode dest_mode = GET_MODE (dest);
+ machine_mode src_mode = GET_MODE (src);
+ convert_optab cvt = unknown_optab;
+ rtx libfunc = NULL_RTX;
+ rtx dest2;
+
+ if (dest_mode == src_mode)
+ gcc_unreachable ();
+
+ if (FLOAT128_IEEE_P (dest_mode))
+ {
+ if (src_mode == SFmode
+ || src_mode == DFmode
+ || FLOAT128_IBM_P (src_mode))
+ cvt = sext_optab;
+
+ else if (GET_MODE_CLASS (src_mode) == MODE_INT)
+ cvt = (unsigned_p) ? ufloat_optab : sfloat_optab;
+
+ else if (FLOAT128_IEEE_P (src_mode))
+ emit_move_insn (dest, gen_lowpart (dest_mode, src));
+
+ else
+ gcc_unreachable ();
+ }
+
+ else if (FLOAT128_IEEE_P (src_mode))
+ {
+ if (dest_mode == SFmode
+ || dest_mode == DFmode
+ || FLOAT128_IBM_P (dest_mode))
+ cvt = trunc_optab;
+
+ else if (GET_MODE_CLASS (dest_mode) == MODE_INT)
+ cvt = (unsigned_p) ? ufix_optab : sfix_optab;
+
+ else
+ gcc_unreachable ();
+ }
+
+ else
+ gcc_unreachable ();
+
+ gcc_assert (cvt != unknown_optab);
+ libfunc = convert_optab_libfunc (cvt, dest_mode, src_mode);
+ gcc_assert (libfunc != NULL_RTX);
+
+ dest2 = emit_library_call_value (libfunc, dest, LCT_CONST, dest_mode, 1, src,
+ src_mode);
+
+ gcc_assert (dest != NULL_RTX);
+ if (!rtx_equal_p (dest, dest2))
+ emit_move_insn (dest, dest2);
+
+ return;
+}
+
/* Emit the RTL for an sISEL pattern. */
void
@@ -22635,6 +22833,7 @@ rs6000_function_ok_for_sibcall (tree decl, tree exp)
|| ((DEFAULT_ABI == ABI_AIX || DEFAULT_ABI == ABI_ELFv2)
&& decl
&& !DECL_EXTERNAL (decl)
+ && !DECL_WEAK (decl)
&& (*targetm.binds_local_p) (decl))
|| (DEFAULT_ABI == ABI_V4
&& (!TARGET_SECURE_PLT
@@ -32921,6 +33120,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "crypto", OPTION_MASK_CRYPTO, false, true },
{ "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
+ { "efficient-unaligned-vsx", OPTION_MASK_EFFICIENT_UNALIGNED_VSX,
+ false, true },
{ "fprnd", OPTION_MASK_FPRND, false, true },
{ "hard-dfp", OPTION_MASK_DFP, false, true },
{ "htm", OPTION_MASK_HTM, false, true },
@@ -34786,7 +34987,7 @@ class swap_web_entry : public web_entry_base
/* A nonzero value indicates what kind of special handling for this
insn is required if doublewords are swapped. Undefined if
is_swappable is not set. */
- unsigned int special_handling : 3;
+ unsigned int special_handling : 4;
/* Set if the web represented by this entry cannot be optimized. */
unsigned int web_not_optimizable : 1;
/* Set if this insn should be deleted. */
@@ -34800,7 +35001,9 @@ enum special_handling_values {
SH_NOSWAP_LD,
SH_NOSWAP_ST,
SH_EXTRACT,
- SH_SPLAT
+ SH_SPLAT,
+ SH_XXPERMDI,
+ SH_CONCAT
};
/* Union INSN with all insns containing definitions that reach USE.
@@ -34992,6 +35195,20 @@ rtx_is_swappable_p (rtx op, unsigned int *special)
*special = SH_EXTRACT;
return 1;
}
+ /* An XXPERMDI is ok if we adjust the lanes. Note that if the
+ XXPERMDI is a swap operation, it will be identified by
+ insn_is_swap_p and therefore we won't get here. */
+ else if (GET_CODE (XEXP (op, 0)) == VEC_CONCAT
+ && (GET_MODE (XEXP (op, 0)) == V4DFmode
+ || GET_MODE (XEXP (op, 0)) == V4DImode)
+ && GET_CODE ((parallel = XEXP (op, 1))) == PARALLEL
+ && XVECLEN (parallel, 0) == 2
+ && GET_CODE (XVECEXP (parallel, 0, 0)) == CONST_INT
+ && GET_CODE (XVECEXP (parallel, 0, 1)) == CONST_INT)
+ {
+ *special = SH_XXPERMDI;
+ return 1;
+ }
else
return 0;
@@ -35169,6 +35386,17 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn,
return 1;
}
+ /* A concatenation of two doublewords is ok if we reverse the
+ order of the inputs. */
+ if (GET_CODE (body) == SET
+ && GET_CODE (SET_SRC (body)) == VEC_CONCAT
+ && (GET_MODE (SET_SRC (body)) == V2DFmode
+ || GET_MODE (SET_SRC (body)) == V2DImode))
+ {
+ *special = SH_CONCAT;
+ return 1;
+ }
+
/* Otherwise check the operands for vector lane violations. */
return rtx_is_swappable_p (body, special);
}
@@ -35458,6 +35686,49 @@ adjust_splat (rtx_insn *insn)
fprintf (dump_file, "Changing lane for splat %d\n", INSN_UID (insn));
}
+/* Given OP that contains an XXPERMDI operation (that is not a doubleword
+ swap), reverse the order of the source operands and adjust the indices
+ of the source lanes to account for doubleword reversal. */
+static void
+adjust_xxpermdi (rtx_insn *insn)
+{
+ rtx set = PATTERN (insn);
+ rtx select = XEXP (set, 1);
+ rtx concat = XEXP (select, 0);
+ rtx src0 = XEXP (concat, 0);
+ XEXP (concat, 0) = XEXP (concat, 1);
+ XEXP (concat, 1) = src0;
+ rtx parallel = XEXP (select, 1);
+ int lane0 = INTVAL (XVECEXP (parallel, 0, 0));
+ int lane1 = INTVAL (XVECEXP (parallel, 0, 1));
+ int new_lane0 = 3 - lane1;
+ int new_lane1 = 3 - lane0;
+ XVECEXP (parallel, 0, 0) = GEN_INT (new_lane0);
+ XVECEXP (parallel, 0, 1) = GEN_INT (new_lane1);
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Changing lanes for xxpermdi %d\n", INSN_UID (insn));
+}
+
+/* Given OP that contains a VEC_CONCAT operation of two doublewords,
+ reverse the order of those inputs. */
+static void
+adjust_concat (rtx_insn *insn)
+{
+ rtx set = PATTERN (insn);
+ rtx concat = XEXP (set, 1);
+ rtx src0 = XEXP (concat, 0);
+ XEXP (concat, 0) = XEXP (concat, 1);
+ XEXP (concat, 1) = src0;
+ INSN_CODE (insn) = -1; /* Force re-recognition. */
+ df_insn_rescan (insn);
+
+ if (dump_file)
+ fprintf (dump_file, "Reversing inputs for concat %d\n", INSN_UID (insn));
+}
+
/* The insn described by INSN_ENTRY[I] can be swapped, but only
with special handling. Take care of that here. */
static void
@@ -35504,6 +35775,14 @@ handle_special_swappables (swap_web_entry *insn_entry, unsigned i)
/* Change the lane on a direct-splat operation. */
adjust_splat (insn);
break;
+ case SH_XXPERMDI:
+ /* Change the lanes on an XXPERMDI operation. */
+ adjust_xxpermdi (insn);
+ break;
+ case SH_CONCAT:
+ /* Reverse the order of a concatenation operation. */
+ adjust_concat (insn);
+ break;
}
}
@@ -35576,6 +35855,10 @@ dump_swap_insn_table (swap_web_entry *insn_entry)
fputs ("special:extract ", dump_file);
else if (insn_entry[i].special_handling == SH_SPLAT)
fputs ("special:splat ", dump_file);
+ else if (insn_entry[i].special_handling == SH_XXPERMDI)
+ fputs ("special:xxpermdi ", dump_file);
+ else if (insn_entry[i].special_handling == SH_CONCAT)
+ fputs ("special:concat ", dump_file);
}
if (insn_entry[i].web_not_optimizable)
fputs ("unoptimizable ", dump_file);
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 527ad985423..cfdb286a2cb 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -348,6 +348,8 @@
&& TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128")
+ (IF "TARGET_FLOAT128")
+ (KF "TARGET_FLOAT128")
(DD "TARGET_DFP")
(TD "TARGET_DFP")])
@@ -365,9 +367,14 @@
(define_mode_iterator FMOVE32 [SF SD])
(define_mode_iterator FMOVE64 [DF DD])
(define_mode_iterator FMOVE64X [DI DF DD])
-(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
+(define_mode_iterator FMOVE128 [(TF "TARGET_LONG_DOUBLE_128")
+ (IF "TARGET_LONG_DOUBLE_128")
(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+(define_mode_iterator FMOVE128_FPR [(TF "FLOAT128_2REG_P (TFmode)")
+ (IF "FLOAT128_2REG_P (IFmode)")
+ (TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+
; Iterators for 128 bit types for direct move
(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE")
(V16QI "")
@@ -376,7 +383,13 @@
(V4SF "")
(V2DI "")
(V2DF "")
- (V1TI "")])
+ (V1TI "")
+ (KF "")
+ (TF "")
+ (IF "")])
+
+; Iterator for 128-bit VSX types for pack/unpack
+(define_mode_iterator FMOVE128_VSX [V1TI KF])
; Whether a floating point move is ok, don't allow SD without hardware FP
(define_mode_attr fmove_ok [(SF "")
@@ -432,6 +445,25 @@
; Iterator for just SF/DF
(define_mode_iterator SFDF [SF DF])
+; Iterator for float128 floating conversions
+(define_mode_iterator FLOAT128_SFDFTF [
+ (SF "TARGET_FLOAT128")
+ (DF "TARGET_FLOAT128")
+ (TF "FLOAT128_IBM_P (TFmode)")
+ (IF "TARGET_FLOAT128")])
+
+; Iterator for special 128-bit floating point. This is for non-default
+; conversions, so TFmode is not used here.
+(define_mode_iterator IFKF [IF KF])
+
+; Iterator for 128-bit floating point that uses the IBM double-double format
+(define_mode_iterator IBM128 [IF TF])
+
+; Iterator for 128-bit floating point
+(define_mode_iterator TFIFKF [(KF "TARGET_FLOAT128")
+ (IF "TARGET_FLOAT128")
+ (TF "TARGET_LONG_DOUBLE_128")])
+
; SF/DF suffix for traditional floating instructions
(define_mode_attr Ftrad [(SF "s") (DF "")])
@@ -596,7 +628,7 @@
;; Reload iterator for creating the function to allocate a base register to
;; supplement addressing modes.
(define_mode_iterator RELOAD [V16QI V8HI V4SI V2DI V4SF V2DF V1TI
- SF SD SI DF DD DI TI PTI])
+ SF SD SI DF DD DI TI PTI KF IF TF])
;; Start with fixed-point load and store insns. Here we put only the more
@@ -3037,15 +3069,15 @@
(set_attr "dot" "yes")
(set_attr "length" "4,8")])
-(define_insn_and_split "*and<mode>3_imm_dot_shifted"
- [(set (match_operand:CC 3 "cc_reg_operand" "=x,?y")
+(define_insn "*and<mode>3_imm_dot_shifted"
+ [(set (match_operand:CC 3 "cc_reg_operand" "=x")
(compare:CC
(and:GPR
- (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r,r")
- (match_operand:SI 4 "const_int_operand" "n,n"))
- (match_operand:GPR 2 "const_int_operand" "n,n"))
+ (lshiftrt:GPR (match_operand:GPR 1 "gpc_reg_operand" "%r")
+ (match_operand:SI 4 "const_int_operand" "n"))
+ (match_operand:GPR 2 "const_int_operand" "n"))
(const_int 0)))
- (clobber (match_scratch:GPR 0 "=r,r"))]
+ (clobber (match_scratch:GPR 0 "=r"))]
"logical_const_operand (GEN_INT (UINTVAL (operands[2])
<< INTVAL (operands[4])),
DImode)
@@ -3054,23 +3086,10 @@
&& rs6000_gen_cell_microcode"
{
operands[2] = GEN_INT (UINTVAL (operands[2]) << INTVAL (operands[4]));
- if (which_alternative == 0)
- return "andi%e2. %0,%1,%u2";
- else
- return "#";
+ return "andi%e2. %0,%1,%u2";
}
- "&& reload_completed && cc_reg_not_cr0_operand (operands[3], CCmode)"
- [(set (match_dup 0)
- (and:GPR (lshiftrt:GPR (match_dup 1)
- (match_dup 4))
- (match_dup 2)))
- (set (match_dup 3)
- (compare:CC (match_dup 0)
- (const_int 0)))]
- ""
[(set_attr "type" "logical")
- (set_attr "dot" "yes")
- (set_attr "length" "4,8")])
+ (set_attr "dot" "yes")])
(define_insn "and<mode>3_mask"
@@ -3664,10 +3683,10 @@
; an insert instruction, in many cases.
(define_insn_and_split "*ior<mode>_mask"
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
- (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
- (match_operand:GPR 2 "const_int_operand" "n")))]
- "can_create_pseudo_p ()
- && !logical_const_operand (operands[2], <MODE>mode)
+ (ior:GPR (match_operand:GPR 1 "gpc_reg_operand" "0")
+ (match_operand:GPR 2 "const_int_operand" "n")))
+ (clobber (match_scratch:GPR 3 "=r"))]
+ "!logical_const_operand (operands[2], <MODE>mode)
&& rs6000_is_valid_mask (operands[2], NULL, NULL, <MODE>mode)"
"#"
"&& 1"
@@ -3682,7 +3701,8 @@
{
int nb, ne;
rs6000_is_valid_mask (operands[2], &nb, &ne, <MODE>mode);
- operands[3] = gen_reg_rtx (<MODE>mode);
+ if (GET_CODE (operands[3]) == SCRATCH)
+ operands[3] = gen_reg_rtx (<MODE>mode);
operands[4] = GEN_INT (ne);
operands[5] = GEN_INT (~UINTVAL (operands[2]));
}
@@ -4216,19 +4236,18 @@
;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
;; builtins.c and optabs.c that are not correct for IBM long double
;; when little-endian.
-(define_expand "signbittf2"
+(define_expand "signbit<mode>2"
[(set (match_dup 2)
- (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))
+ (float_truncate:DF (match_operand:IBM128 1 "gpc_reg_operand" "")))
(set (match_dup 3)
(subreg:DI (match_dup 2) 0))
(set (match_dup 4)
(match_dup 5))
(set (match_operand:SI 0 "gpc_reg_operand" "")
(match_dup 6))]
- "!TARGET_IEEEQUAD
+ "FLOAT128_IBM_P (<MODE>mode)
&& TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)
- && TARGET_LONG_DOUBLE_128"
+ && (TARGET_FPRS || TARGET_E500_DOUBLE)"
{
operands[2] = gen_reg_rtx (DFmode);
operands[3] = gen_reg_rtx (DImode);
@@ -6402,9 +6421,10 @@
;; problematical. Don't allow direct move for this case.
(define_insn_and_split "*mov<mode>_64bit_dm"
- [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm")
- (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))]
+ [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r,r,wm")
+ (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r,wm,r"))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_POWERPC64
+ && FLOAT128_2REG_P (<MODE>mode)
&& (<MODE>mode != TDmode || WORDS_BIG_ENDIAN)
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -6427,9 +6447,12 @@
[(set_attr "length" "8,8,8,8,12,12,8")])
(define_insn_and_split "*mov<mode>_32bit"
- [(set (match_operand:FMOVE128 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
- (match_operand:FMOVE128 1 "input_operand" "d,m,d,j,r,jY,r"))]
+ [(set (match_operand:FMOVE128_FPR 0 "nonimmediate_operand" "=m,d,d,ws,Y,r,r")
+ (match_operand:FMOVE128_FPR 1 "input_operand" "d,m,d,j,r,jY,r"))]
"TARGET_HARD_FLOAT && TARGET_FPRS && !TARGET_POWERPC64
+ && (FLOAT128_2REG_P (<MODE>mode)
+ || int_reg_operand_not_pseudo (operands[0], <MODE>mode)
+ || int_reg_operand_not_pseudo (operands[1], <MODE>mode))
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
"#"
@@ -6453,12 +6476,12 @@
(define_expand "extenddftf2"
[(set (match_operand:TF 0 "nonimmediate_operand" "")
(float_extend:TF (match_operand:DF 1 "input_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)
+ "TARGET_HARD_FLOAT && (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128"
{
- if (TARGET_E500_DOUBLE)
+ if (TARGET_IEEEQUAD)
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ else if (TARGET_E500_DOUBLE)
emit_insn (gen_spe_extenddftf2 (operands[0], operands[1]));
else
emit_insn (gen_extenddftf2_fprs (operands[0], operands[1]));
@@ -6507,25 +6530,34 @@
(define_expand "extendsftf2"
[(set (match_operand:TF 0 "nonimmediate_operand" "")
(float_extend:TF (match_operand:SF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
+ "TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128"
{
- rtx tmp = gen_reg_rtx (DFmode);
- emit_insn (gen_extendsfdf2 (tmp, operands[1]));
- emit_insn (gen_extenddftf2 (operands[0], tmp));
+ if (TARGET_IEEEQUAD)
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ else
+ {
+ rtx tmp = gen_reg_rtx (DFmode);
+ emit_insn (gen_extendsfdf2 (tmp, operands[1]));
+ emit_insn (gen_extenddftf2 (operands[0], tmp));
+ }
DONE;
})
(define_expand "trunctfdf2"
[(set (match_operand:DF 0 "gpc_reg_operand" "")
(float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
+ "TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128"
- "")
+{
+ if (TARGET_IEEEQUAD)
+ {
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+ }
+})
(define_insn_and_split "trunctfdf2_internal1"
[(set (match_operand:DF 0 "gpc_reg_operand" "=d,?d")
@@ -6556,12 +6588,13 @@
(define_expand "trunctfsf2"
[(set (match_operand:SF 0 "gpc_reg_operand" "")
(float_truncate:SF (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
+ "TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE)
&& TARGET_LONG_DOUBLE_128"
{
- if (TARGET_E500_DOUBLE)
+ if (TARGET_IEEEQUAD)
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ else if (TARGET_E500_DOUBLE)
emit_insn (gen_spe_trunctfsf2 (operands[0], operands[1]));
else
emit_insn (gen_trunctfsf2_fprs (operands[0], operands[1]));
@@ -6612,10 +6645,12 @@
(define_expand "fix_trunctfsi2"
[(set (match_operand:SI 0 "gpc_reg_operand" "")
(fix:SI (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD && TARGET_HARD_FLOAT
+ "TARGET_HARD_FLOAT
&& (TARGET_FPRS || TARGET_E500_DOUBLE) && TARGET_LONG_DOUBLE_128"
{
- if (TARGET_E500_DOUBLE)
+ if (TARGET_IEEEQUAD)
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ else if (TARGET_E500_DOUBLE)
emit_insn (gen_spe_fix_trunctfsi2 (operands[0], operands[1]));
else
emit_insn (gen_fix_trunctfsi2_fprs (operands[0], operands[1]));
@@ -6663,20 +6698,73 @@
DONE;
})
-(define_expand "negtf2"
- [(set (match_operand:TF 0 "gpc_reg_operand" "")
- (neg:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)
- && TARGET_LONG_DOUBLE_128"
- "")
+(define_expand "fix_trunctfdi2"
+ [(set (match_operand:DI 0 "nonimmediate_operand" "")
+ (fix:DI (match_operand:TF 1 "gpc_reg_operand" "")))]
+ "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "fixuns_trunctf<mode>2"
+ [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+ (unsigned_fix:SDI (match_operand:TF 1 "gpc_reg_operand" "")))]
+ "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], true);
+ DONE;
+})
+
+(define_expand "floatditf2"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (float:TF (match_operand:DI 1 "gpc_reg_operand" "")))]
+ "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "floatuns<mode>tf2"
+ [(set (match_operand:TF 0 "nonimmediate_operand" "")
+ (unsigned_float:TF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+ "TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], true);
+ DONE;
+})
+
+(define_expand "neg<mode>2"
+ [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "")
+ (neg:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))]
+ "FLOAT128_IEEE_P (<MODE>mode)
+ || (FLOAT128_IBM_P (<MODE>mode)
+ && TARGET_HARD_FLOAT
+ && (TARGET_FPRS || TARGET_E500_DOUBLE))"
+ "
+{
+ if (FLOAT128_IEEE_P (<MODE>mode))
+ {
+ if (TARGET_FLOAT128)
+ emit_insn (gen_ieee_128bit_vsx_neg<mode>2 (operands[0], operands[1]));
+ else
+ {
+ rtx libfunc = optab_libfunc (neg_optab, <MODE>mode);
+ rtx target = emit_library_call_value (libfunc, operands[0], LCT_CONST,
+ <MODE>mode, 1,
+ operands[1], <MODE>mode);
+
+ if (target && !rtx_equal_p (target, operands[0]))
+ emit_move_insn (operands[0], target);
+ }
+ DONE;
+ }
+}")
(define_insn "negtf2_internal"
[(set (match_operand:TF 0 "gpc_reg_operand" "=d")
(neg:TF (match_operand:TF 1 "gpc_reg_operand" "d")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_LONG_DOUBLE_128"
+ "TARGET_HARD_FLOAT && TARGET_FPRS && FLOAT128_IBM_P (TFmode)"
"*
{
if (REGNO (operands[0]) == REGNO (operands[1]) + 1)
@@ -6687,16 +6775,29 @@
[(set_attr "type" "fp")
(set_attr "length" "8")])
-(define_expand "abstf2"
- [(set (match_operand:TF 0 "gpc_reg_operand" "")
- (abs:TF (match_operand:TF 1 "gpc_reg_operand" "")))]
- "!TARGET_IEEEQUAD
- && TARGET_HARD_FLOAT
- && (TARGET_FPRS || TARGET_E500_DOUBLE)
- && TARGET_LONG_DOUBLE_128"
+(define_expand "abs<mode>2"
+ [(set (match_operand:TFIFKF 0 "gpc_reg_operand" "")
+ (abs:TFIFKF (match_operand:TFIFKF 1 "gpc_reg_operand" "")))]
+ "FLOAT128_IEEE_P (<MODE>mode)
+ || (FLOAT128_IBM_P (<MODE>mode)
+ && TARGET_HARD_FLOAT
+ && (TARGET_FPRS || TARGET_E500_DOUBLE))"
"
{
- rtx label = gen_label_rtx ();
+ rtx label;
+
+ if (FLOAT128_IEEE_P (<MODE>mode))
+ {
+ if (TARGET_FLOAT128)
+ {
+ emit_insn (gen_ieee_128bit_vsx_abs<mode>2 (operands[0], operands[1]));
+ DONE;
+ }
+ else
+ FAIL;
+ }
+
+ label = gen_label_rtx ();
if (TARGET_E500_DOUBLE)
{
if (flag_finite_math_only && !flag_trapping_math)
@@ -6732,6 +6833,184 @@
operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
operands[6] = simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word);
}")
+
+
+;; Generate IEEE 128-bit -0.0 (0x80000000000000000000000000000000) in a vector
+;; register
+
+(define_expand "ieee_128bit_negative_zero"
+ [(set (match_operand:V16QI 0 "register_operand" "") (match_dup 1))]
+ "TARGET_FLOAT128"
+{
+ rtvec v = rtvec_alloc (16);
+ int i, high;
+
+ for (i = 0; i < 16; i++)
+ RTVEC_ELT (v, i) = const0_rtx;
+
+ high = (BYTES_BIG_ENDIAN) ? 0 : 15;
+ RTVEC_ELT (v, high) = GEN_INT (0x80);
+
+ rs6000_expand_vector_init (operands[0], gen_rtx_PARALLEL (V16QImode, v));
+ DONE;
+})
+
+;; IEEE 128-bit negate
+
+;; We have 2 insns here for negate and absolute value. The first uses
+;; match_scratch so that phases like combine can recognize neg/abs as generic
+;; insns, and second insn after the first split pass loads up the bit to
+;; twiddle the sign bit. Later GCSE passes can then combine multiple uses of
+;; neg/abs to create the constant just once.
+
+(define_insn_and_split "ieee_128bit_vsx_neg<mode>2"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+ (clobber (match_scratch:V16QI 2 "=v"))]
+ "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0)
+ (neg:TFIFKF (match_dup 1)))
+ (use (match_dup 2))])]
+{
+ if (GET_CODE (operands[2]) == SCRATCH)
+ operands[2] = gen_reg_rtx (V16QImode);
+
+ operands[3] = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_neg<mode>2_internal"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (neg:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+ (use (match_operand:V16QI 2 "register_operand" "=v"))]
+ "TARGET_FLOAT128"
+ "xxlxor %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+;; IEEE 128-bit absolute value
+(define_insn_and_split "ieee_128bit_vsx_abs<mode>2"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+ (clobber (match_scratch:V16QI 2 "=v"))]
+ "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0)
+ (abs:TFIFKF (match_dup 1)))
+ (use (match_dup 2))])]
+{
+ if (GET_CODE (operands[2]) == SCRATCH)
+ operands[2] = gen_reg_rtx (V16QImode);
+
+ operands[3] = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_abs<mode>2_internal"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (abs:TFIFKF (match_operand:TFIFKF 1 "register_operand" "wa")))
+ (use (match_operand:V16QI 2 "register_operand" "=v"))]
+ "TARGET_FLOAT128"
+ "xxlandc %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+;; IEEE 128-bit negative absolute value
+(define_insn_and_split "*ieee_128bit_vsx_nabs<mode>2"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (neg:TFIFKF
+ (abs:TFIFKF
+ (match_operand:TFIFKF 1 "register_operand" "wa"))))
+ (clobber (match_scratch:V16QI 2 "=v"))]
+ "TARGET_FLOAT128 && FLOAT128_IEEE_P (<MODE>mode)"
+ "#"
+ "&& 1"
+ [(parallel [(set (match_dup 0)
+ (abs:TFIFKF (match_dup 1)))
+ (use (match_dup 2))])]
+{
+ if (GET_CODE (operands[2]) == SCRATCH)
+ operands[2] = gen_reg_rtx (V16QImode);
+
+ operands[3] = gen_reg_rtx (V16QImode);
+ emit_insn (gen_ieee_128bit_negative_zero (operands[2]));
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "vecsimple")])
+
+(define_insn "*ieee_128bit_vsx_nabs<mode>2_internal"
+ [(set (match_operand:TFIFKF 0 "register_operand" "=wa")
+ (neg:TFIFKF
+ (abs:TFIFKF
+ (match_operand:TFIFKF 1 "register_operand" "wa"))))
+ (use (match_operand:V16QI 2 "register_operand" "=v"))]
+ "TARGET_FLOAT128"
+ "xxlor %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")])
+
+;; Float128 conversion functions. These expand to library function calls.
+
+(define_expand "extend<FLOAT128_SFDFTF:mode><IFKF:mode>2"
+ [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+ (float_extend:IFKF
+ (match_operand:FLOAT128_SFDFTF 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "trunc<IFKF:mode><FLOAT128_SFDFTF:mode>2"
+ [(set (match_operand:FLOAT128_SFDFTF 0 "nonimmediate_operand" "")
+ (float_truncate:FLOAT128_SFDFTF
+ (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "fix_trunc<IFKF:mode><SDI:mode>2"
+ [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+ (fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "fixuns_trunc<IFKF:mode><SDI:mode>2"
+ [(set (match_operand:SDI 0 "nonimmediate_operand" "")
+ (unsigned_fix:SDI (match_operand:IFKF 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], true);
+ DONE;
+})
+
+(define_expand "float<SDI:mode><IFKF:mode>2"
+ [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+ (float:KF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], false);
+ DONE;
+})
+
+(define_expand "floatuns<SDI:mode><IFKF:mode>2"
+ [(set (match_operand:IFKF 0 "nonimmediate_operand" "")
+ (unsigned_float:IFKF (match_operand:SDI 1 "gpc_reg_operand" "")))]
+ "TARGET_FLOAT128"
+{
+ rs6000_expand_float128_convert (operands[0], operands[1], true);
+ DONE;
+})
+
;; Reload helper functions used by rs6000_secondary_reload. The patterns all
;; must have 3 arguments, and scratch register constraint must be a single
@@ -9516,7 +9795,7 @@
{
int i;
- emit_call_insn (GEN_CALL (operands[0], const0_rtx, const0_rtx, const0_rtx));
+ emit_call_insn (gen_call (operands[0], const0_rtx, const0_rtx));
for (i = 0; i < XVECLEN (operands[2], 0); i++)
{
@@ -12134,7 +12413,10 @@
;; Pack/unpack 128-bit floating point types that take 2 scalar registers
; Type of the 64-bit part when packing/unpacking 128-bit floating point types
-(define_mode_attr FP128_64 [(TF "DF") (TD "DI")])
+(define_mode_attr FP128_64 [(TF "DF")
+ (IF "DF")
+ (TD "DI")
+ (KF "DI")])
(define_expand "unpack<mode>"
[(set (match_operand:<FP128_64> 0 "nonimmediate_operand" "")
@@ -12142,7 +12424,7 @@
[(match_operand:FMOVE128 1 "register_operand" "")
(match_operand:QI 2 "const_0_to_1_operand" "")]
UNSPEC_UNPACK_128BIT))]
- ""
+ "FLOAT128_2REG_P (<MODE>mode)"
"")
(define_insn_and_split "unpack<mode>_dm"
@@ -12151,7 +12433,7 @@
[(match_operand:FMOVE128 1 "register_operand" "d,d,r,d,r")
(match_operand:QI 2 "const_0_to_1_operand" "i,i,i,i,i")]
UNSPEC_UNPACK_128BIT))]
- "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && FLOAT128_2REG_P (<MODE>mode)"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 3))]
@@ -12175,7 +12457,7 @@
[(match_operand:FMOVE128 1 "register_operand" "d,d")
(match_operand:QI 2 "const_0_to_1_operand" "i,i")]
UNSPEC_UNPACK_128BIT))]
- "!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE"
+ "(!TARGET_POWERPC64 || !TARGET_DIRECT_MOVE) && FLOAT128_2REG_P (<MODE>mode)"
"#"
"&& reload_completed"
[(set (match_dup 0) (match_dup 3))]
@@ -12199,7 +12481,7 @@
[(match_operand:<FP128_64> 1 "register_operand" "0,d")
(match_operand:<FP128_64> 2 "register_operand" "d,d")]
UNSPEC_PACK_128BIT))]
- ""
+ "FLOAT128_2REG_P (<MODE>mode)"
"@
fmr %L0,%2
#"
@@ -12219,12 +12501,12 @@
[(set_attr "type" "fp,fp")
(set_attr "length" "4,8")])
-(define_insn "unpackv1ti"
+(define_insn "unpack<mode>"
[(set (match_operand:DI 0 "register_operand" "=d,d")
- (unspec:DI [(match_operand:V1TI 1 "register_operand" "0,wa")
+ (unspec:DI [(match_operand:FMOVE128_VSX 1 "register_operand" "0,wa")
(match_operand:QI 2 "const_0_to_1_operand" "O,i")]
UNSPEC_UNPACK_128BIT))]
- "TARGET_VSX"
+ "VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)"
{
if (REGNO (operands[0]) == REGNO (operands[1]) && INTVAL (operands[2]) == 0)
return ASM_COMMENT_START " xxpermdi to same register";
@@ -12232,19 +12514,17 @@
operands[3] = GEN_INT (INTVAL (operands[2]) == 0 ? 0 : 3);
return "xxpermdi %x0,%x1,%x1,%3";
}
- [(set_attr "type" "vecperm")
- (set_attr "length" "4")])
+ [(set_attr "type" "vecperm")])
-(define_insn "packv1ti"
- [(set (match_operand:V1TI 0 "register_operand" "=wa")
- (unspec:V1TI
+(define_insn "pack<mode>"
+ [(set (match_operand:FMOVE128_VSX 0 "register_operand" "=wa")
+ (unspec:FMOVE128_VSX
[(match_operand:DI 1 "register_operand" "d")
(match_operand:DI 2 "register_operand" "d")]
UNSPEC_PACK_128BIT))]
"TARGET_VSX"
"xxpermdi %x0,%x1,%x2,0"
- [(set_attr "type" "vecperm")
- (set_attr "length" "4")])
+ [(set_attr "type" "vecperm")])
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 18ea27a3d90..6d11ff7dfdb 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -212,7 +212,7 @@ Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) Save
; Allow/disallow the movmisalign in DF/DI vectors
mefficient-unaligned-vector
-Target Undocumented Report Var(TARGET_EFFICIENT_UNALIGNED_VSX) Init(-1)
+Target Undocumented Report Mask(EFFICIENT_UNALIGNED_VSX) Var(rs6000_isa_flags)
; Consider unaligned VSX accesses to be efficient/inefficient
mallow-df-permute
diff --git a/gcc/config/rs6000/sysv4.h b/gcc/config/rs6000/sysv4.h
index 24618e309f1..f48af43e7c5 100644
--- a/gcc/config/rs6000/sysv4.h
+++ b/gcc/config/rs6000/sysv4.h
@@ -574,7 +574,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
%{R*} \
%(link_shlib) \
%{!T*: %(link_start) } \
-%(link_target) \
%(link_os)"
/* Shared libraries are not default. */
@@ -584,10 +583,6 @@ ENDIAN_SELECT(" -mbig", " -mlittle", DEFAULT_ASM_ENDIAN)
%{shared:-G -dy -z text } \
%{symbolic:-Bsymbolic -G -dy -z text }"
-/* Override the default target of the linker. */
-#define LINK_TARGET_SPEC \
- ENDIAN_SELECT("", " --oformat elf32-powerpcle", "")
-
/* Any specific OS flags. */
#define LINK_OS_SPEC "\
%{mads : %(link_os_ads) ; \
@@ -873,7 +868,6 @@ ncrtn.o%s"
{ "endfile_openbsd", ENDFILE_OPENBSD_SPEC }, \
{ "endfile_default", ENDFILE_DEFAULT_SPEC }, \
{ "link_shlib", LINK_SHLIB_SPEC }, \
- { "link_target", LINK_TARGET_SPEC }, \
{ "link_start", LINK_START_SPEC }, \
{ "link_start_ads", LINK_START_ADS_SPEC }, \
{ "link_start_yellowknife", LINK_START_YELLOWKNIFE_SPEC }, \
diff --git a/gcc/config/rs6000/sysv4le.h b/gcc/config/rs6000/sysv4le.h
index 7b1d6a1b4de..66ee7cadfe4 100644
--- a/gcc/config/rs6000/sysv4le.h
+++ b/gcc/config/rs6000/sysv4le.h
@@ -25,10 +25,6 @@
#undef DEFAULT_ASM_ENDIAN
#define DEFAULT_ASM_ENDIAN " -mlittle"
-#undef LINK_TARGET_SPEC
-#define LINK_TARGET_SPEC \
- ENDIAN_SELECT(" --oformat elf32-powerpc", "", "")
-
#undef MULTILIB_DEFAULTS
#define MULTILIB_DEFAULTS { "mlittle", "mcall-sysv" }
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 4a62fbbbdd4..8821dec5989 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -977,6 +977,8 @@
;; General shift amounts can be supported using vsro + vsr. We're
;; not expecting to see these yet (the vectorizer currently
;; generates only shifts by a whole number of vector elements).
+;; Note that the vec_shr operation is actually defined as
+;; 'shift toward element 0' so is a shr for LE and shl for BE.
(define_expand "vec_shr_<mode>"
[(match_operand:VEC_L 0 "vlogical_operand" "")
(match_operand:VEC_L 1 "vlogical_operand" "")
@@ -987,6 +989,7 @@
rtx bitshift = operands[2];
rtx shift;
rtx insn;
+ rtx zero_reg, op1, op2;
HOST_WIDE_INT bitshift_val;
HOST_WIDE_INT byteshift_val;
@@ -996,19 +999,29 @@
if (bitshift_val & 0x7)
FAIL;
byteshift_val = (bitshift_val >> 3);
+ zero_reg = gen_reg_rtx (<MODE>mode);
+ emit_move_insn (zero_reg, CONST0_RTX (<MODE>mode));
if (!BYTES_BIG_ENDIAN)
- byteshift_val = 16 - byteshift_val;
+ {
+ byteshift_val = 16 - byteshift_val;
+ op1 = zero_reg;
+ op2 = operands[1];
+ }
+ else
+ {
+ op1 = operands[1];
+ op2 = zero_reg;
+ }
+
if (TARGET_VSX && (byteshift_val & 0x3) == 0)
{
shift = gen_rtx_CONST_INT (QImode, byteshift_val >> 2);
- insn = gen_vsx_xxsldwi_<mode> (operands[0], operands[1], operands[1],
- shift);
+ insn = gen_vsx_xxsldwi_<mode> (operands[0], op1, op2, shift);
}
else
{
shift = gen_rtx_CONST_INT (QImode, byteshift_val);
- insn = gen_altivec_vsldoi_<mode> (operands[0], operands[1], operands[1],
- shift);
+ insn = gen_altivec_vsldoi_<mode> (operands[0], op1, op2, shift);
}
emit_insn (insn);