summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorCaroline Tice <cmtice@chromium.org>2013-06-21 12:05:49 -0700
committerCaroline Tice <cmtice@chromium.org>2013-06-21 12:05:49 -0700
commit9c1705845241cdea5cf61574218edef282901fbd (patch)
tree021da661f8a821bc7eea277521fa13f2043879a0 /gcc/config
parent9bdedf4df4166718fa0a44811c643214c6880471 (diff)
parent8a46ed36b0449c870d0a938c1010ad610ebcf4f7 (diff)
downloadgcc-9c1705845241cdea5cf61574218edef282901fbd.tar.gz
Merge branch 'master' into vtv
Conflicts: gcc/ChangeLog gcc/cp/ChangeLog gcc/cp/decl2.c gcc/varasm.c libstdc++-v3/ChangeLog
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-protos.h8
-rw-r--r--gcc/config/aarch64/aarch64-simd.md132
-rw-r--r--gcc/config/aarch64/aarch64.c346
-rw-r--r--gcc/config/aarch64/aarch64.md42
-rw-r--r--gcc/config/aarch64/constraints.md5
-rw-r--r--gcc/config/aarch64/iterators.md3
-rw-r--r--gcc/config/alpha/alpha.c15
-rw-r--r--gcc/config/arm/arm-fixed.md38
-rw-r--r--gcc/config/arm/arm-ldmstm.ml7
-rw-r--r--gcc/config/arm/arm.c57
-rw-r--r--gcc/config/arm/arm.md161
-rw-r--r--gcc/config/arm/arm.opt4
-rw-r--r--gcc/config/arm/constraints.md12
-rw-r--r--gcc/config/arm/ldmstm.md72
-rw-r--r--gcc/config/arm/neon.md42
-rw-r--r--gcc/config/arm/predicates.md33
-rw-r--r--gcc/config/arm/sync.md30
-rw-r--r--gcc/config/c6x/c6x.h2
-rw-r--r--gcc/config/i386/i386-protos.h2
-rw-r--r--gcc/config/i386/i386.c394
-rw-r--r--gcc/config/i386/i386.h16
-rw-r--r--gcc/config/i386/i386.md26
-rw-r--r--gcc/config/mips/mips-cpus.def1
-rw-r--r--gcc/config/mips/mips-dsp.md9
-rw-r--r--gcc/config/mips/mips-ps-3d.md4
-rw-r--r--gcc/config/mips/mips-tables.opt288
-rw-r--r--gcc/config/mips/mips.c24
-rw-r--r--gcc/config/mips/mips.h33
-rw-r--r--gcc/config/mips/mips.md326
-rw-r--r--gcc/config/mips/mips.opt4
-rw-r--r--gcc/config/mmix/mmix.c2
-rw-r--r--gcc/config/mmix/mmix.h2
-rw-r--r--gcc/config/rl78/rl78.c19
-rw-r--r--gcc/config/rl78/rl78.md70
-rw-r--r--gcc/config/rs6000/altivec.h16
-rw-r--r--gcc/config/rs6000/altivec.md66
-rw-r--r--gcc/config/rs6000/driver-rs6000.c2
-rw-r--r--gcc/config/rs6000/linux64.h7
-rw-r--r--gcc/config/rs6000/predicates.md11
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def52
-rw-r--r--gcc/config/rs6000/rs6000-c.c436
-rw-r--r--gcc/config/rs6000/rs6000-opts.h22
-rw-r--r--gcc/config/rs6000/rs6000-protos.h3
-rw-r--r--gcc/config/rs6000/rs6000.c1176
-rw-r--r--gcc/config/rs6000/rs6000.h13
-rw-r--r--gcc/config/rs6000/rs6000.md473
-rw-r--r--gcc/config/rs6000/spe.md8
-rw-r--r--gcc/config/rs6000/sync.md166
-rw-r--r--gcc/config/rs6000/t-linux2
-rw-r--r--gcc/config/rs6000/vector.md56
-rw-r--r--gcc/config/rs6000/vsx.md543
-rw-r--r--gcc/config/s390/s390.c34
-rw-r--r--gcc/config/s390/s390.h2
-rw-r--r--gcc/config/s390/s390.md13
-rw-r--r--gcc/config/s390/s390.opt4
55 files changed, 4047 insertions, 1287 deletions
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index bdb6b040578..12f3c3a6fe6 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -149,6 +149,8 @@ bool aarch64_legitimate_pic_operand_p (rtx);
bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
enum machine_mode);
+char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
+char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
bool aarch64_regno_ok_for_base_p (int, bool);
@@ -157,6 +159,8 @@ bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
+bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool,
+ struct simd_immediate_info *);
bool aarch64_symbolic_address_p (rtx);
bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
enum aarch64_symbol_type *);
@@ -222,6 +226,8 @@ void aarch64_split_128bit_move (rtx, rtx);
bool aarch64_split_128bit_move_p (rtx, rtx);
+void aarch64_split_simd_combine (rtx, rtx, rtx);
+
void aarch64_split_simd_move (rtx, rtx);
/* Check for a legitimate floating point constant for FMOV. */
@@ -257,6 +263,4 @@ extern void aarch64_split_combinev16qi (rtx operands[3]);
extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
-
-char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 04fbdbd5837..02037f3f2cb 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -409,7 +409,7 @@
case 4: return "ins\t%0.d[0], %1";
case 5: return "mov\t%0, %1";
case 6:
- return aarch64_output_simd_mov_immediate (&operands[1],
+ return aarch64_output_simd_mov_immediate (operands[1],
<MODE>mode, 64);
default: gcc_unreachable ();
}
@@ -440,7 +440,7 @@
case 5:
return "#";
case 6:
- return aarch64_output_simd_mov_immediate (&operands[1], <MODE>mode, 128);
+ return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
default:
gcc_unreachable ();
}
@@ -1058,9 +1058,9 @@
(vec_duplicate:<VHALF> (const_int 0))))]
"TARGET_SIMD"
"@
- mov\\t%d0, %d1
- fmov\t%d0, %1
- dup\t%d0, %1"
+ dup\\t%d0, %1.d[0]
+ fmov\\t%d0, %1
+ dup\\t%d0, %1"
[(set_attr "v8type" "*,fmov,*")
(set_attr "simd_type" "simd_dup,*,simd_dup")
(set_attr "simd_mode" "<MODE>")
@@ -1190,6 +1190,104 @@
;; Widening arithmetic.
+(define_insn "*aarch64_<su>mlal_lo<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 4 "register_operand" "w")
+ (match_dup 3))))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlal_hi<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 4 "register_operand" "w")
+ (match_dup 3))))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl_lo<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 4 "register_operand" "w")
+ (match_dup 3))))))]
+ "TARGET_SIMD"
+ "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl_hi<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 4 "register_operand" "w")
+ (match_dup 3))))))]
+ "TARGET_SIMD"
+ "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlal<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VDW 1 "register_operand" "w"))
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VDW 2 "register_operand" "w")))
+ (match_operand:<VWIDE> 3 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VDW 2 "register_operand" "w"))
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VDW 3 "register_operand" "w")))))]
+ "TARGET_SIMD"
+ "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
@@ -2218,15 +2316,29 @@
(set_attr "simd_mode" "<MODE>")]
)
-(define_insn "aarch64_combine<mode>"
+(define_insn_and_split "aarch64_combine<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=&w")
(vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
(match_operand:VDC 2 "register_operand" "w")))]
"TARGET_SIMD"
- "mov\\t%0.d[0], %1.d[0]\;ins\\t%0.d[1], %2.d[0]"
- [(set_attr "simd_type" "simd_ins")
- (set_attr "simd_mode" "<MODE>")]
-)
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+ DONE;
+})
+
+(define_expand "aarch64_simd_combine<mode>"
+ [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+ (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
+ (match_operand:VDC 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ {
+ emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
+ emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
+ DONE;
+ })
;; <su><addsub>l<q>.
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index 9c77888157d..527b00dbcaa 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -87,6 +87,14 @@ struct aarch64_address_info {
enum aarch64_symbol_type symbol_type;
};
+struct simd_immediate_info
+{
+ rtx value;
+ int shift;
+ int element_width;
+ bool mvn;
+};
+
/* The current code model. */
enum aarch64_code_model aarch64_cmodel;
@@ -103,8 +111,6 @@ static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_override_options_after_change (void);
-static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
- int *, unsigned char *, int *, int *);
static bool aarch64_vector_mode_supported_p (enum machine_mode);
static unsigned bit_count (unsigned HOST_WIDE_INT);
static bool aarch64_const_vec_all_same_int_p (rtx,
@@ -532,9 +538,7 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
{
rtx tmp_reg = dest;
if (can_create_pseudo_p ())
- {
- tmp_reg = gen_reg_rtx (Pmode);
- }
+ tmp_reg = gen_reg_rtx (Pmode);
emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
return;
@@ -696,6 +700,49 @@ aarch64_split_128bit_move_p (rtx dst, rtx src)
|| ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
}
+/* Split a complex SIMD combine. */
+
+void
+aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+{
+ enum machine_mode src_mode = GET_MODE (src1);
+ enum machine_mode dst_mode = GET_MODE (dst);
+
+ gcc_assert (VECTOR_MODE_P (dst_mode));
+
+ if (REG_P (dst) && REG_P (src1) && REG_P (src2))
+ {
+ rtx (*gen) (rtx, rtx, rtx);
+
+ switch (src_mode)
+ {
+ case V8QImode:
+ gen = gen_aarch64_simd_combinev8qi;
+ break;
+ case V4HImode:
+ gen = gen_aarch64_simd_combinev4hi;
+ break;
+ case V2SImode:
+ gen = gen_aarch64_simd_combinev2si;
+ break;
+ case V2SFmode:
+ gen = gen_aarch64_simd_combinev2sf;
+ break;
+ case DImode:
+ gen = gen_aarch64_simd_combinedi;
+ break;
+ case DFmode:
+ gen = gen_aarch64_simd_combinedf;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (gen (dst, src1, src2));
+ return;
+ }
+}
+
/* Split a complex SIMD move. */
void
@@ -5068,6 +5115,10 @@ aarch64_classify_symbol (rtx x,
return SYMBOL_SMALL_ABSOLUTE;
case AARCH64_CMODEL_TINY_PIC:
+ if (!aarch64_symbol_binds_local_p (x))
+ return SYMBOL_SMALL_GOT;
+ return SYMBOL_TINY_ABSOLUTE;
+
case AARCH64_CMODEL_SMALL_PIC:
if (!aarch64_symbol_binds_local_p (x))
return SYMBOL_SMALL_GOT;
@@ -5150,8 +5201,7 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
/* This could probably go away because
we now decompose CONST_INTs according to expand_mov_immediate. */
if ((GET_CODE (x) == CONST_VECTOR
- && aarch64_simd_valid_immediate (x, mode, false,
- NULL, NULL, NULL, NULL, NULL) != -1)
+ && aarch64_simd_valid_immediate (x, mode, false, NULL))
|| CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
return !targetm.cannot_force_const_mem (mode, x);
@@ -5982,32 +6032,57 @@ aarch64_vector_mode_supported_p (enum machine_mode mode)
return false;
}
-/* Return quad mode as the preferred SIMD mode. */
+/* Return appropriate SIMD container
+ for MODE within a vector of WIDTH bits. */
static enum machine_mode
-aarch64_preferred_simd_mode (enum machine_mode mode)
+aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
{
+ gcc_assert (width == 64 || width == 128);
if (TARGET_SIMD)
- switch (mode)
- {
- case DFmode:
- return V2DFmode;
- case SFmode:
- return V4SFmode;
- case SImode:
- return V4SImode;
- case HImode:
- return V8HImode;
- case QImode:
- return V16QImode;
- case DImode:
- return V2DImode;
- break;
-
- default:;
- }
+ {
+ if (width == 128)
+ switch (mode)
+ {
+ case DFmode:
+ return V2DFmode;
+ case SFmode:
+ return V4SFmode;
+ case SImode:
+ return V4SImode;
+ case HImode:
+ return V8HImode;
+ case QImode:
+ return V16QImode;
+ case DImode:
+ return V2DImode;
+ default:
+ break;
+ }
+ else
+ switch (mode)
+ {
+ case SFmode:
+ return V2SFmode;
+ case SImode:
+ return V2SImode;
+ case HImode:
+ return V4HImode;
+ case QImode:
+ return V8QImode;
+ default:
+ break;
+ }
+ }
return word_mode;
}
+/* Return 128-bit container as the preferred SIMD mode for MODE. */
+static enum machine_mode
+aarch64_preferred_simd_mode (enum machine_mode mode)
+{
+ return aarch64_simd_container_mode (mode, 128);
+}
+
/* Return the bitmask of possible vector sizes for the vectorizer
to iterate over. */
static unsigned int
@@ -6095,7 +6170,7 @@ aarch64_mangle_type (const_tree type)
}
/* Return the equivalent letter for size. */
-static unsigned char
+static char
sizetochar (int size)
{
switch (size)
@@ -6142,15 +6217,10 @@ aarch64_vect_float_const_representable_p (rtx x)
return aarch64_float_const_representable_p (x0);
}
-/* TODO: This function returns values similar to those
- returned by neon_valid_immediate in gcc/config/arm/arm.c
- but the API here is different enough that these magic numbers
- are not used. It should be sufficient to return true or false. */
-static int
-aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
- rtx *modconst, int *elementwidth,
- unsigned char *elementchar,
- int *mvn, int *shift)
+/* Return true for valid and false for invalid. */
+bool
+aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
+ struct simd_immediate_info *info)
{
#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
matches = 1; \
@@ -6161,7 +6231,6 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
{ \
immtype = (CLASS); \
elsize = (ELSIZE); \
- elchar = sizetochar (elsize); \
eshift = (SHIFT); \
emvn = (NEG); \
break; \
@@ -6170,36 +6239,25 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
unsigned char bytes[16];
- unsigned char elchar = 0;
int immtype = -1, matches;
unsigned int invmask = inverse ? 0xff : 0;
int eshift, emvn;
if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
{
- bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
- int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
-
- if (!(simd_imm_zero
- || aarch64_vect_float_const_representable_p (op)))
- return -1;
-
- if (modconst)
- *modconst = CONST_VECTOR_ELT (op, 0);
-
- if (elementwidth)
- *elementwidth = elem_width;
-
- if (elementchar)
- *elementchar = sizetochar (elem_width);
+ if (! (aarch64_simd_imm_zero_p (op, mode)
+ || aarch64_vect_float_const_representable_p (op)))
+ return false;
- if (shift)
- *shift = 0;
+ if (info)
+ {
+ info->value = CONST_VECTOR_ELT (op, 0);
+ info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
+ info->mvn = false;
+ info->shift = 0;
+ }
- if (simd_imm_zero)
- return 19;
- else
- return 18;
+ return true;
}
/* Splat vector constant out into a byte vector. */
@@ -6297,23 +6355,14 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
if (immtype == -1
|| (immtype >= 12 && immtype <= 15)
|| immtype == 18)
- return -1;
-
-
- if (elementwidth)
- *elementwidth = elsize;
-
- if (elementchar)
- *elementchar = elchar;
-
- if (mvn)
- *mvn = emvn;
-
- if (shift)
- *shift = eshift;
+ return false;
- if (modconst)
+ if (info)
{
+ info->element_width = elsize;
+ info->mvn = emvn != 0;
+ info->shift = eshift;
+
unsigned HOST_WIDE_INT imm = 0;
/* Un-invert bytes of recognized vector, if necessary. */
@@ -6330,68 +6379,27 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
<< (i * BITS_PER_UNIT);
- *modconst = GEN_INT (imm);
- }
- else
- {
- unsigned HOST_WIDE_INT imm = 0;
- for (i = 0; i < elsize / BITS_PER_UNIT; i++)
- imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+ info->value = GEN_INT (imm);
+ }
+ else
+ {
+ for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+ imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
/* Construct 'abcdefgh' because the assembler cannot handle
- generic constants. */
- gcc_assert (shift != NULL && mvn != NULL);
- if (*mvn)
+ generic constants. */
+ if (info->mvn)
imm = ~imm;
- imm = (imm >> *shift) & 0xff;
- *modconst = GEN_INT (imm);
- }
+ imm = (imm >> info->shift) & 0xff;
+ info->value = GEN_INT (imm);
+ }
}
- return immtype;
+ return true;
#undef CHECK
}
-/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
- (or, implicitly, MVNI) immediate. Write back width per element
- to *ELEMENTWIDTH, and a modified constant (whatever should be output
- for a MOVI instruction) in *MODCONST. */
-int
-aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
- rtx *modconst, int *elementwidth,
- unsigned char *elementchar,
- int *mvn, int *shift)
-{
- rtx tmpconst;
- int tmpwidth;
- unsigned char tmpwidthc;
- int tmpmvn = 0, tmpshift = 0;
- int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
- &tmpwidth, &tmpwidthc,
- &tmpmvn, &tmpshift);
-
- if (retval == -1)
- return 0;
-
- if (modconst)
- *modconst = tmpconst;
-
- if (elementwidth)
- *elementwidth = tmpwidth;
-
- if (elementchar)
- *elementchar = tmpwidthc;
-
- if (mvn)
- *mvn = tmpmvn;
-
- if (shift)
- *shift = tmpshift;
-
- return 1;
-}
-
static bool
aarch64_const_vec_all_same_int_p (rtx x,
HOST_WIDE_INT minval,
@@ -6496,9 +6504,7 @@ aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
gcc_assert (!VECTOR_MODE_P (mode));
vmode = aarch64_preferred_simd_mode (mode);
rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
- int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
- NULL, NULL, NULL, NULL);
- return retval;
+ return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
}
/* Construct and return a PARALLEL RTX vector. */
@@ -6726,8 +6732,7 @@ aarch64_simd_make_constant (rtx vals)
gcc_unreachable ();
if (const_vec != NULL_RTX
- && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
- NULL, NULL, NULL))
+ && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
/* Load using MOVI/MVNI. */
return const_vec;
else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
@@ -7285,49 +7290,78 @@ aarch64_float_const_representable_p (rtx x)
}
char*
-aarch64_output_simd_mov_immediate (rtx *const_vector,
+aarch64_output_simd_mov_immediate (rtx const_vector,
enum machine_mode mode,
unsigned width)
{
- int is_valid;
- unsigned char widthc;
- int lane_width_bits;
+ bool is_valid;
static char templ[40];
- int shift = 0, mvn = 0;
const char *mnemonic;
unsigned int lane_count = 0;
+ char element_char;
- is_valid =
- aarch64_simd_immediate_valid_for_move (*const_vector, mode,
- const_vector, &lane_width_bits,
- &widthc, &mvn, &shift);
+ struct simd_immediate_info info;
+
+ /* This will return true to show const_vector is legal for use as either
+ a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
+ also update INFO to show how the immediate should be generated. */
+ is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
gcc_assert (is_valid);
+ element_char = sizetochar (info.element_width);
+ lane_count = width / info.element_width;
+
mode = GET_MODE_INNER (mode);
if (mode == SFmode || mode == DFmode)
{
- bool zero_p =
- aarch64_float_const_zero_rtx_p (*const_vector);
- gcc_assert (shift == 0);
- mnemonic = zero_p ? "movi" : "fmov";
+ gcc_assert (info.shift == 0 && ! info.mvn);
+ if (aarch64_float_const_zero_rtx_p (info.value))
+ info.value = GEN_INT (0);
+ else
+ {
+#define buf_size 20
+ REAL_VALUE_TYPE r;
+ REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
+ char float_buf[buf_size] = {'\0'};
+ real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
+#undef buf_size
+
+ if (lane_count == 1)
+ snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
+ else
+ snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
+ lane_count, element_char, float_buf);
+ return templ;
+ }
}
- else
- mnemonic = mvn ? "mvni" : "movi";
- gcc_assert (lane_width_bits != 0);
- lane_count = width / lane_width_bits;
+ mnemonic = info.mvn ? "mvni" : "movi";
if (lane_count == 1)
- snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
- else if (shift)
- snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
- mnemonic, lane_count, widthc, shift);
+ snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
+ mnemonic, UINTVAL (info.value));
+ else if (info.shift)
+ snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
+ ", lsl %d", mnemonic, lane_count, element_char,
+ UINTVAL (info.value), info.shift);
else
- snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
- mnemonic, lane_count, widthc);
+ snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
+ mnemonic, lane_count, element_char, UINTVAL (info.value));
return templ;
}
+char*
+aarch64_output_scalar_simd_mov_immediate (rtx immediate,
+ enum machine_mode mode)
+{
+ enum machine_mode vmode;
+
+ gcc_assert (!VECTOR_MODE_P (mode));
+ vmode = aarch64_simd_container_mode (mode, 64);
+ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
+ return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
+}
+
/* Split operands into moves from op[1] + op[2] into op[0]. */
void
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index 89db09254eb..e88e5be894e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -774,17 +774,34 @@
(match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
"(register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
- "@
- mov\\t%w0, %w1
- mov\\t%w0, %1
- movi\\t%0.<Vallxd>, %1
- ldr<size>\\t%w0, %1
- ldr\\t%<size>0, %1
- str<size>\\t%w1, %0
- str\\t%<size>1, %0
- umov\\t%w0, %1.<v>[0]
- dup\\t%0.<Vallxd>, %w1
- dup\\t%0, %1.<v>[0]"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "mov\t%w0, %w1";
+ case 1:
+ return "mov\t%w0, %1";
+ case 2:
+ return aarch64_output_scalar_simd_mov_immediate (operands[1],
+ <MODE>mode);
+ case 3:
+ return "ldr<size>\t%w0, %1";
+ case 4:
+ return "ldr\t%<size>0, %1";
+ case 5:
+ return "str<size>\t%w1, %0";
+ case 6:
+ return "str\t%<size>1, %0";
+ case 7:
+ return "umov\t%w0, %1.<v>[0]";
+ case 8:
+ return "dup\t%0.<Vallxd>, %w1";
+ case 9:
+ return "dup\t%0, %1.<v>[0]";
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
(set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
(set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
@@ -850,7 +867,8 @@
movi\\t%d0, %1"
[(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
(set_attr "mode" "DI")
- (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,yes")]
+ (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn "insv_imm<mode>"
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index d9c18e692ea..7cafc08fdd9 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -143,9 +143,8 @@
"@internal
A constraint that matches vector of immediates."
(and (match_code "const_vector")
- (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op),
- NULL, NULL, NULL,
- NULL, NULL) != 0")))
+ (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op),
+ false, NULL)")))
(define_constraint "Dh"
"@internal
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 860d4d9a187..8e40c5de5d4 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -385,7 +385,8 @@
;; Double modes of vector modes (lower case).
(define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
(V2SI "v4si") (V2SF "v4sf")
- (SI "v2si") (DI "v2di")])
+ (SI "v2si") (DI "v2di")
+ (DF "v2df")])
;; Narrowed modes for VDN.
(define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 096ef3b1f56..5f5b33e347b 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -2700,12 +2700,12 @@ alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
break;
case GE: case GT: case GEU: case GTU:
- /* These must be swapped. */
- if (op1 != CONST0_RTX (cmp_mode))
- {
- code = swap_condition (code);
- tem = op0, op0 = op1, op1 = tem;
- }
+ /* These normally need swapping, but for integer zero we have
+ special patterns that recognize swapped operands. */
+ if (cmp_mode == DImode && op1 == const0_rtx)
+ break;
+ code = swap_condition (code);
+ tem = op0, op0 = op1, op1 = tem;
break;
default:
@@ -3068,7 +3068,8 @@ alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
out = gen_reg_rtx (DImode);
/* What's actually returned is -1,0,1, not a proper boolean value. */
- note = gen_rtx_UNSPEC (DImode, gen_rtvec (2, op0, op1), UNSPEC_XFLT_COMPARE);
+ note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
+ note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
alpha_emit_xfloating_libcall (func, out, operands, 2, note);
return out;
diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md
index 10da396ab66..12bbbaf9083 100644
--- a/gcc/config/arm/arm-fixed.md
+++ b/gcc/config/arm/arm-fixed.md
@@ -19,12 +19,13 @@
;; This file contains ARM instructions that support fixed-point operations.
(define_insn "add<mode>3"
- [(set (match_operand:FIXED 0 "s_register_operand" "=r")
- (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
- (match_operand:FIXED 2 "s_register_operand" "r")))]
+ [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+ (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+ (match_operand:FIXED 2 "s_register_operand" "l,r")))]
"TARGET_32BIT"
"add%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")])
(define_insn "add<mode>3"
[(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -32,7 +33,8 @@
(match_operand:ADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"sadd<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "usadd<mode>3"
[(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -40,7 +42,8 @@
(match_operand:UQADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"uqadd<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "ssadd<mode>3"
[(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -48,15 +51,17 @@
(match_operand:QADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"qadd<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "sub<mode>3"
- [(set (match_operand:FIXED 0 "s_register_operand" "=r")
- (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
- (match_operand:FIXED 2 "s_register_operand" "r")))]
+ [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+ (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+ (match_operand:FIXED 2 "s_register_operand" "l,r")))]
"TARGET_32BIT"
"sub%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")])
(define_insn "sub<mode>3"
[(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -64,7 +69,8 @@
(match_operand:ADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"ssub<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "ussub<mode>3"
[(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -73,7 +79,8 @@
(match_operand:UQADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"uqsub<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "sssub<mode>3"
[(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -81,7 +88,8 @@
(match_operand:QADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"qsub<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
;; Fractional multiplies.
@@ -374,6 +382,7 @@
"TARGET_32BIT && arm_arch6"
"ssat%?\\t%0, #16, %2%S1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "insn" "sat")
(set_attr "shift" "1")
(set_attr "type" "alu_shift")])
@@ -384,4 +393,5 @@
"TARGET_INT_SIMD"
"usat%?\\t%0, #16, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "insn" "sat")])
diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml
index 2bc9702bee2..e615437b125 100644
--- a/gcc/config/arm/arm-ldmstm.ml
+++ b/gcc/config/arm/arm-ldmstm.ml
@@ -146,12 +146,15 @@ let can_thumb addrmode update is_store =
| IA, true, true -> true
| _ -> false
+exception InvalidAddrMode of string;;
+
let target addrmode thumb =
match addrmode, thumb with
IA, true -> "TARGET_THUMB1"
| IA, false -> "TARGET_32BIT"
| DB, false -> "TARGET_32BIT"
| _, false -> "TARGET_ARM"
+ | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.")
let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
let astr = string_of_addrmode addrmode in
@@ -181,8 +184,10 @@ let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
done;
Printf.printf "}\"\n";
Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs;
- begin if not thumb then
+ if not thumb then begin
Printf.printf "\n (set_attr \"predicable\" \"yes\")";
+ if addrmode == IA || addrmode == DB then
+ Printf.printf "\n (set_attr \"predicable_short_it\" \"no\")";
end;
Printf.printf "])\n\n"
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 55a512349cc..6fc307e7709 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -662,6 +662,10 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_ASAN_SHADOW_OFFSET
#define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
+#undef MAX_INSN_PER_IT_BLOCK
+#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
+
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Obstack for minipool constant handling. */
@@ -1871,6 +1875,11 @@ arm_option_override (void)
arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+ if (arm_restrict_it == 2)
+ arm_restrict_it = arm_arch8 && TARGET_THUMB2;
+
+ if (!TARGET_THUMB2)
+ arm_restrict_it = 0;
/* If we are not using the default (ARM mode) section anchor offset
ranges, then set the correct ranges now. */
@@ -2677,6 +2686,8 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
switch (code)
{
case AND:
+ case IOR:
+ case XOR:
return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
&& (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
case PLUS:
@@ -16168,25 +16179,34 @@ arm_compute_save_reg0_reg12_mask (void)
return save_reg_mask;
}
+/* Return true if r3 is live at the start of the function. */
+
+static bool
+arm_r3_live_at_start_p (void)
+{
+ /* Just look at cfg info, which is still close enough to correct at this
+ point. This gives false positives for broken functions that might use
+ uninitialized data that happens to be allocated in r3, but who cares? */
+ return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3);
+}
/* Compute the number of bytes used to store the static chain register on the
- stack, above the stack frame. We need to know this accurately to get the
- alignment of the rest of the stack frame correct. */
+ stack, above the stack frame. We need to know this accurately to get the
+ alignment of the rest of the stack frame correct. */
-static int arm_compute_static_chain_stack_bytes (void)
+static int
+arm_compute_static_chain_stack_bytes (void)
{
- unsigned long func_type = arm_current_func_type ();
- int static_chain_stack_bytes = 0;
-
- if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
- IS_NESTED (func_type) &&
- df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
- static_chain_stack_bytes = 4;
+ /* See the defining assertion in arm_expand_prologue. */
+ if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
+ && IS_NESTED (arm_current_func_type ())
+ && arm_r3_live_at_start_p ()
+ && crtl->args.pretend_args_size == 0)
+ return 4;
- return static_chain_stack_bytes;
+ return 0;
}
-
/* Compute a bit mask of which registers need to be
saved on the stack for the current function.
This is used by arm_get_frame_offsets, which may add extra registers. */
@@ -18141,16 +18161,16 @@ arm_expand_prologue (void)
}
else if (IS_NESTED (func_type))
{
- /* The Static chain register is the same as the IP register
+ /* The static chain register is the same as the IP register
used as a scratch register during stack frame creation.
To get around this need to find somewhere to store IP
whilst the frame is being created. We try the following
places in order:
- 1. The last argument register.
+ 1. The last argument register r3.
2. A slot on the stack above the frame. (This only
works if the function is not a varargs function).
- 3. Register r3, after pushing the argument registers
+ 3. Register r3 again, after pushing the argument registers
onto the stack.
Note - we only need to tell the dwarf2 backend about the SP
@@ -18158,7 +18178,7 @@ arm_expand_prologue (void)
doesn't need to be unwound, as it doesn't contain a value
inherited from the caller. */
- if (df_regs_ever_live_p (3) == false)
+ if (!arm_r3_live_at_start_p ())
insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
else if (args_to_push == 0)
{
@@ -18299,8 +18319,7 @@ arm_expand_prologue (void)
if (IS_NESTED (func_type))
{
/* Recover the static chain register. */
- if (!df_regs_ever_live_p (3)
- || saved_pretend_args)
+ if (!arm_r3_live_at_start_p () || saved_pretend_args)
insn = gen_rtx_REG (SImode, 3);
else /* if (crtl->args.pretend_args_size == 0) */
{
@@ -19592,7 +19611,7 @@ thumb2_final_prescan_insn (rtx insn)
break;
/* Allow up to 4 conditionally executed instructions in a block. */
n = get_attr_ce_count (insn);
- if (arm_condexec_masklen + n > 4)
+ if (arm_condexec_masklen + n > MAX_INSN_PER_IT_BLOCK)
break;
predicate = COND_EXEC_TEST (body);
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index b7db3616cdf..3f0e021f3ed 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -93,6 +93,15 @@
; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code.
(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code")))
+; We use this attribute to disable alternatives that can produce 32-bit
+; instructions inside an IT-block in Thumb2 state. ARMv8 deprecates IT blocks
+; that contain 32-bit instructions.
+(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes"))
+
+; This attribute is used to disable a predicated alternative when we have
+; arm_restrict_it.
+(define_attr "predicable_short_it" "no,yes" (const_string "yes"))
+
;; Operand number of an input operand that is shifted. Zero if the
;; given instruction does not shift one of its input operands.
(define_attr "shift" "" (const_int 0))
@@ -103,6 +112,8 @@
(define_attr "fpu" "none,vfp"
(const (symbol_ref "arm_fpu_attr")))
+(define_attr "predicated" "yes,no" (const_string "no"))
+
; LENGTH of an instruction (in bytes)
(define_attr "length" ""
(const_int 4))
@@ -190,6 +201,15 @@
(cond [(eq_attr "insn_enabled" "no")
(const_string "no")
+ (and (eq_attr "predicable_short_it" "no")
+ (and (eq_attr "predicated" "yes")
+ (match_test "arm_restrict_it")))
+ (const_string "no")
+
+ (and (eq_attr "enabled_for_depr_it" "no")
+ (match_test "arm_restrict_it"))
+ (const_string "no")
+
(eq_attr "arch_enabled" "no")
(const_string "no")
@@ -2163,29 +2183,28 @@
)
(define_insn_and_split "*anddi3_insn"
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w")
- (and:DI (match_operand:DI 1 "s_register_operand" "%0 ,r ,0,r ,w,0 ,0 ,r ,w ,0")
- (match_operand:DI 2 "arm_anddi_operand_neon" "r ,r ,De,De,w,DL,r ,r ,w ,DL")))]
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w")
+ (and:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0")
+ (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))]
"TARGET_32BIT && !TARGET_IWMMXT"
{
switch (which_alternative)
{
- case 0:
- case 1:
+ case 0: /* fall through */
+ case 6: return "vand\t%P0, %P1, %P2";
+ case 1: /* fall through */
+ case 7: return neon_output_logic_immediate ("vand", &operands[2],
+ DImode, 1, VALID_NEON_QREG_MODE (DImode));
case 2:
- case 3: /* fall through */
- return "#";
- case 4: /* fall through */
- case 8: return "vand\t%P0, %P1, %P2";
+ case 3:
+ case 4:
case 5: /* fall through */
- case 9: return neon_output_logic_immediate ("vand", &operands[2],
- DImode, 1, VALID_NEON_QREG_MODE (DImode));
- case 6: return "#";
- case 7: return "#";
+ return "#";
default: gcc_unreachable ();
}
}
- "TARGET_32BIT && !TARGET_IWMMXT"
+ "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))"
[(set (match_dup 3) (match_dup 4))
(set (match_dup 5) (match_dup 6))]
"
@@ -2201,19 +2220,11 @@
gen_highpart_mode (SImode, DImode, operands[2]));
}"
- [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
- (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*,
+ [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,
avoid_neon_for_64bits,avoid_neon_for_64bits")
- (set_attr "length" "8,8,8,8,*,*,8,8,*,*")
- (set (attr "insn_enabled") (if_then_else
- (lt (symbol_ref "which_alternative")
- (const_int 4))
- (if_then_else (match_test "!TARGET_NEON")
- (const_string "yes")
- (const_string "no"))
- (if_then_else (match_test "TARGET_NEON")
- (const_string "yes")
- (const_string "no"))))]
+ (set_attr "length" "*,*,8,8,8,8,*,*")
+ ]
)
(define_insn_and_split "*anddi_zesidi_di"
@@ -2997,14 +3008,47 @@
""
)
-(define_insn "*iordi3_insn"
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
- (ior:DI (match_operand:DI 1 "s_register_operand" "%0,r")
- (match_operand:DI 2 "s_register_operand" "r,r")))]
- "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
- "#"
- [(set_attr "length" "8")
- (set_attr "predicable" "yes")]
+(define_insn_and_split "*iordi3_insn"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w")
+ (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0")
+ (match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))]
+ "TARGET_32BIT && !TARGET_IWMMXT"
+ {
+ switch (which_alternative)
+ {
+ case 0: /* fall through */
+ case 6: return "vorr\t%P0, %P1, %P2";
+ case 1: /* fall through */
+ case 7: return neon_output_logic_immediate ("vorr", &operands[2],
+ DImode, 0, VALID_NEON_QREG_MODE (DImode));
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ return "#";
+ default: gcc_unreachable ();
+ }
+ }
+ "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+ [(set (match_dup 3) (match_dup 4))
+ (set (match_dup 5) (match_dup 6))]
+ "
+ {
+ operands[3] = gen_lowpart (SImode, operands[0]);
+ operands[5] = gen_highpart (SImode, operands[0]);
+
+ operands[4] = simplify_gen_binary (IOR, SImode,
+ gen_lowpart (SImode, operands[1]),
+ gen_lowpart (SImode, operands[2]));
+ operands[6] = simplify_gen_binary (IOR, SImode,
+ gen_highpart (SImode, operands[1]),
+ gen_highpart_mode (SImode, DImode, operands[2]));
+
+ }"
+ [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+ (set_attr "length" "*,*,8,8,8,8,*,*")
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
)
(define_insn "*iordi_zesidi_di"
@@ -3137,19 +3181,49 @@
(define_expand "xordi3"
[(set (match_operand:DI 0 "s_register_operand" "")
(xor:DI (match_operand:DI 1 "s_register_operand" "")
- (match_operand:DI 2 "s_register_operand" "")))]
+ (match_operand:DI 2 "arm_xordi_operand" "")))]
"TARGET_32BIT"
""
)
-(define_insn "*xordi3_insn"
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
- (xor:DI (match_operand:DI 1 "s_register_operand" "%0,r")
- (match_operand:DI 2 "s_register_operand" "r,r")))]
- "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
- "#"
- [(set_attr "length" "8")
- (set_attr "predicable" "yes")]
+(define_insn_and_split "*xordi3_insn"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,&r,&r,&r,&r,?w")
+ (xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w")
+ (match_operand:DI 2 "arm_xordi_operand" "w ,r ,r ,Dg,Dg,w")))]
+ "TARGET_32BIT && !TARGET_IWMMXT"
+{
+ switch (which_alternative)
+ {
+ case 1:
+ case 2:
+ case 3:
+ case 4: /* fall through */
+ return "#";
+ case 0: /* fall through */
+ case 5: return "veor\t%P0, %P1, %P2";
+ default: gcc_unreachable ();
+ }
+}
+ "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+ [(set (match_dup 3) (match_dup 4))
+ (set (match_dup 5) (match_dup 6))]
+ "
+ {
+ operands[3] = gen_lowpart (SImode, operands[0]);
+ operands[5] = gen_highpart (SImode, operands[0]);
+
+ operands[4] = simplify_gen_binary (XOR, SImode,
+ gen_lowpart (SImode, operands[1]),
+ gen_lowpart (SImode, operands[2]));
+ operands[6] = simplify_gen_binary (XOR, SImode,
+ gen_highpart (SImode, operands[1]),
+ gen_highpart_mode (SImode, DImode, operands[2]));
+
+ }"
+ [(set_attr "length" "*,8,8,8,8,*")
+ (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1")
+ (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")]
)
(define_insn "*xordi_zesidi_di"
@@ -12097,6 +12171,7 @@
(const_int 0)])]
"TARGET_32BIT"
""
+[(set_attr "predicated" "yes")]
)
(define_insn "force_register_use"
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index afb42421c06..b9ae2b09682 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -239,6 +239,10 @@ mword-relocations
Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
Only generate absolute relocations on word sized values.
+mrestrict-it
+Target Report Var(arm_restrict_it) Init(2)
+Generate IT blocks appropriate for ARMv8.
+
mfix-cortex-m3-ldrd
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 7e7b3e69e0a..7cd8e31c97f 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -260,6 +260,18 @@
(and (match_code "const_int")
(match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
+(define_constraint "Df"
+ "@internal
+ In ARM/Thumb-2 state a const_int that can be used by insn iordi."
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)")))
+
+(define_constraint "Dg"
+ "@internal
+ In ARM/Thumb-2 state a const_int that can be used by insn xordi."
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)")))
+
(define_constraint "Di"
"@internal
In ARM/Thumb-2 state a const_int or const_double where both the high
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 8ebdfc81761..ad137d492e4 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -37,7 +37,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"ldm%(ia%)\t%5, {%1, %2, %3, %4}"
[(set_attr "type" "load4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm4_ia"
[(match_parallel 0 "load_multiple_operation"
@@ -74,7 +75,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
"ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
[(set_attr "type" "load4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm4_ia_update"
[(match_parallel 0 "load_multiple_operation"
@@ -108,7 +110,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"stm%(ia%)\t%5, {%1, %2, %3, %4}"
[(set_attr "type" "store4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm4_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -125,7 +128,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
"stm%(ia%)\t%5!, {%1, %2, %3, %4}"
[(set_attr "type" "store4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_stm4_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -302,7 +306,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"ldm%(db%)\t%5, {%1, %2, %3, %4}"
[(set_attr "type" "load4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*ldm4_db_update"
[(match_parallel 0 "load_multiple_operation"
@@ -323,7 +328,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
"ldm%(db%)\t%5!, {%1, %2, %3, %4}"
[(set_attr "type" "load4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm4_db"
[(match_parallel 0 "store_multiple_operation"
@@ -338,7 +344,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"stm%(db%)\t%5, {%1, %2, %3, %4}"
[(set_attr "type" "store4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm4_db_update"
[(match_parallel 0 "store_multiple_operation"
@@ -355,7 +362,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
"stm%(db%)\t%5!, {%1, %2, %3, %4}"
[(set_attr "type" "store4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_peephole2
[(set (match_operand:SI 0 "s_register_operand" "")
@@ -477,7 +485,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"ldm%(ia%)\t%4, {%1, %2, %3}"
[(set_attr "type" "load3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm3_ia"
[(match_parallel 0 "load_multiple_operation"
@@ -508,7 +517,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"ldm%(ia%)\t%4!, {%1, %2, %3}"
[(set_attr "type" "load3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm3_ia_update"
[(match_parallel 0 "load_multiple_operation"
@@ -537,7 +547,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"stm%(ia%)\t%4, {%1, %2, %3}"
[(set_attr "type" "store3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm3_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -552,7 +563,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"stm%(ia%)\t%4!, {%1, %2, %3}"
[(set_attr "type" "store3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_stm3_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -704,7 +716,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"ldm%(db%)\t%4, {%1, %2, %3}"
[(set_attr "type" "load3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*ldm3_db_update"
[(match_parallel 0 "load_multiple_operation"
@@ -722,7 +735,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"ldm%(db%)\t%4!, {%1, %2, %3}"
[(set_attr "type" "load3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm3_db"
[(match_parallel 0 "store_multiple_operation"
@@ -735,7 +749,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"stm%(db%)\t%4, {%1, %2, %3}"
[(set_attr "type" "store3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm3_db_update"
[(match_parallel 0 "store_multiple_operation"
@@ -750,7 +765,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"stm%(db%)\t%4!, {%1, %2, %3}"
[(set_attr "type" "store3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_peephole2
[(set (match_operand:SI 0 "s_register_operand" "")
@@ -855,7 +871,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
"ldm%(ia%)\t%3, {%1, %2}"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm2_ia"
[(match_parallel 0 "load_multiple_operation"
@@ -880,7 +897,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"ldm%(ia%)\t%3!, {%1, %2}"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm2_ia_update"
[(match_parallel 0 "load_multiple_operation"
@@ -904,7 +922,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
"stm%(ia%)\t%3, {%1, %2}"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm2_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -917,7 +936,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"stm%(ia%)\t%3!, {%1, %2}"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_stm2_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -1044,7 +1064,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
"ldm%(db%)\t%3, {%1, %2}"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*ldm2_db_update"
[(match_parallel 0 "load_multiple_operation"
@@ -1059,7 +1080,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"ldm%(db%)\t%3!, {%1, %2}"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm2_db"
[(match_parallel 0 "store_multiple_operation"
@@ -1070,7 +1092,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
"stm%(db%)\t%3, {%1, %2}"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm2_db_update"
[(match_parallel 0 "store_multiple_operation"
@@ -1083,7 +1106,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"stm%(db%)\t%3!, {%1, %2}"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_peephole2
[(set (match_operand:SI 0 "s_register_operand" "")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index f91a6f7d08b..e814df0d264 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -679,29 +679,6 @@
[(set_attr "neon_type" "neon_int_1")]
)
-(define_insn "iordi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
- (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0")
- (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r,w,Dl")))]
- "TARGET_NEON"
-{
- switch (which_alternative)
- {
- case 0: /* fall through */
- case 4: return "vorr\t%P0, %P1, %P2";
- case 1: /* fall through */
- case 5: return neon_output_logic_immediate ("vorr", &operands[2],
- DImode, 0, VALID_NEON_QREG_MODE (DImode));
- case 2: return "#";
- case 3: return "#";
- default: gcc_unreachable ();
- }
-}
- [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
- (set_attr "length" "*,*,8,8,*,*")
- (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
-)
-
;; The concrete forms of the Neon immediate-logic instructions are vbic and
;; vorr. We support the pseudo-instruction vand instead, because that
;; corresponds to the canonical form the middle-end expects to use for
@@ -805,21 +782,6 @@
[(set_attr "neon_type" "neon_int_1")]
)
-(define_insn "xordi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
- (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r,w")
- (match_operand:DI 2 "s_register_operand" "w,r,r,w")))]
- "TARGET_NEON"
- "@
- veor\t%P0, %P1, %P2
- #
- #
- veor\t%P0, %P1, %P2"
- [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
- (set_attr "length" "*,8,8,*")
- (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
-)
-
(define_insn "one_cmpl<mode>2"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
@@ -5617,7 +5579,7 @@
(match_operand:SI 3 "immediate_operand" "")]
"TARGET_NEON"
{
- emit_insn (gen_ior<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+ emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
DONE;
})
@@ -5628,7 +5590,7 @@
(match_operand:SI 3 "immediate_operand" "")]
"TARGET_NEON"
{
- emit_insn (gen_xor<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+ emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
DONE;
})
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92de9fe8bd9..d169cb27035 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -42,6 +42,17 @@
(ior (match_operand 0 "imm_for_neon_inv_logic_operand")
(match_operand 0 "s_register_operand")))
+(define_predicate "imm_for_neon_logic_operand"
+ (match_code "const_vector")
+{
+ return (TARGET_NEON
+ && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
+})
+
+(define_predicate "neon_logic_op2"
+ (ior (match_operand 0 "imm_for_neon_logic_operand")
+ (match_operand 0 "s_register_operand")))
+
;; Any hard register.
(define_predicate "arm_hard_register_operand"
(match_code "reg")
@@ -162,6 +173,17 @@
(match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
(match_operand 0 "neon_inv_logic_op2")))
+(define_predicate "arm_iordi_operand_neon"
+ (ior (match_operand 0 "s_register_operand")
+ (and (match_code "const_int")
+ (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)"))
+ (match_operand 0 "neon_logic_op2")))
+
+(define_predicate "arm_xordi_operand"
+ (ior (match_operand 0 "s_register_operand")
+ (and (match_code "const_int")
+ (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)"))))
+
(define_predicate "arm_adddi_operand"
(ior (match_operand 0 "s_register_operand")
(and (match_code "const_int")
@@ -535,17 +557,6 @@
(ior (match_operand 0 "s_register_operand")
(match_operand 0 "imm_for_neon_rshift_operand")))
-(define_predicate "imm_for_neon_logic_operand"
- (match_code "const_vector")
-{
- return (TARGET_NEON
- && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
-})
-
-(define_predicate "neon_logic_op2"
- (ior (match_operand 0 "imm_for_neon_logic_operand")
- (match_operand 0 "s_register_operand")))
-
;; Predicates for named expanders that overlap multiple ISAs.
(define_predicate "cmpdi_operand"
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 980234836c9..8f7bd71c317 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -124,7 +124,8 @@
UNSPEC_LL))]
"TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN"
"ldrexd%?\t%0, %H0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "s_register_operand" "") ;; bool out
@@ -361,7 +362,8 @@
VUNSPEC_LL)))]
"TARGET_HAVE_LDREXBH"
"ldrex<sync_sfx>%?\t%0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_acquire_exclusive<mode>"
[(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -371,7 +373,8 @@
VUNSPEC_LAX)))]
"TARGET_HAVE_LDACQ"
"ldaex<sync_sfx>%?\\t%0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_exclusivesi"
[(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -380,7 +383,8 @@
VUNSPEC_LL))]
"TARGET_HAVE_LDREX"
"ldrex%?\t%0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_acquire_exclusivesi"
[(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -389,7 +393,8 @@
VUNSPEC_LAX))]
"TARGET_HAVE_LDACQ"
"ldaex%?\t%0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_exclusivedi"
[(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -398,7 +403,8 @@
VUNSPEC_LL))]
"TARGET_HAVE_LDREXD"
"ldrexd%?\t%0, %H0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_acquire_exclusivedi"
[(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -407,7 +413,8 @@
VUNSPEC_LAX))]
"TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
"ldaexd%?\t%0, %H0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_store_exclusive<mode>"
[(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -431,7 +438,8 @@
}
return "strex<sync_sfx>%?\t%0, %2, %C1";
}
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_store_release_exclusivedi"
[(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -448,7 +456,8 @@
operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
return "stlexd%?\t%0, %2, %3, %C1";
}
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_store_release_exclusive<mode>"
[(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -459,4 +468,5 @@
VUNSPEC_SLX))]
"TARGET_HAVE_LDACQ"
"stlex<sync_sfx>%?\t%0, %2, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index ce331cbe363..c30a9718e76 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -134,7 +134,7 @@ extern c6x_cpu_t c6x_arch;
Really only externally visible arrays must be aligned this way, as
only those are directly visible from another compilation unit. But
we don't have that information available here. */
-#define DATA_ALIGNMENT(TYPE, ALIGN) \
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
(((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE) \
? BITS_PER_UNIT * 8 : (ALIGN))
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index ef4dc761d5a..f228e87a2e4 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -207,7 +207,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
#endif /* RTX_CODE */
#ifdef TREE_CODE
-extern int ix86_data_alignment (tree, int);
+extern int ix86_data_alignment (tree, int, bool);
extern unsigned int ix86_local_alignment (tree, enum machine_mode,
unsigned int);
extern unsigned int ix86_minimum_alignment (tree, enum machine_mode,
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index e8f47c9d417..45e88996ad2 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -2108,7 +2108,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
a conditional move. */
- m_ATOM
+ m_ATOM,
+
+ /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
+ fp converts to destination register. */
+ m_SLM
+
};
/* Feature tests against the various architecture variations. */
@@ -17392,10 +17397,24 @@ distance_agu_use (unsigned int regno0, rtx insn)
static bool
ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
- unsigned int regno2, int split_cost)
+ unsigned int regno2, int split_cost, bool has_scale)
{
int dist_define, dist_use;
+ /* For Silvermont if using a 2-source or 3-source LEA for
+ non-destructive destination purposes, or due to wanting
+ ability to use SCALE, the use of LEA is justified. */
+ if (ix86_tune == PROCESSOR_SLM)
+ {
+ if (has_scale)
+ return true;
+ if (split_cost < 1)
+ return false;
+ if (regno0 == regno1 || regno0 == regno2)
+ return false;
+ return true;
+ }
+
dist_define = distance_non_agu_define (regno1, regno2, insn);
dist_use = distance_agu_use (regno0, insn);
@@ -17484,7 +17503,7 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[])
if (regno0 == regno1 || regno0 == regno2)
return false;
else
- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
}
/* Return true if we should emit lea instruction instead of mov
@@ -17506,7 +17525,7 @@ ix86_use_lea_for_mov (rtx insn, rtx operands[])
regno0 = true_regnum (operands[0]);
regno1 = true_regnum (operands[1]);
- return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
+ return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
}
/* Return true if we need to split lea into a sequence of
@@ -17585,7 +17604,8 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
split_cost -= 1;
}
- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
+ parts.scale > 1);
}
/* Emit x86 binary operand CODE in mode MODE, where the first operand
@@ -17770,7 +17790,7 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[])
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
- return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
+ return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
}
/* Return true if destination reg of SET_BODY is shift count of
@@ -24368,6 +24388,73 @@ ix86_agi_dependent (rtx set_insn, rtx use_insn)
return false;
}
+/* Helper function for exact_store_load_dependency.
+ Return true if addr is found in insn. */
+static bool
+exact_dependency_1 (rtx addr, rtx insn)
+{
+ enum rtx_code code;
+ const char *format_ptr;
+ int i, j;
+
+ code = GET_CODE (insn);
+ switch (code)
+ {
+ case MEM:
+ if (rtx_equal_p (addr, insn))
+ return true;
+ break;
+ case REG:
+ CASE_CONST_ANY:
+ case SYMBOL_REF:
+ case CODE_LABEL:
+ case PC:
+ case CC0:
+ case EXPR_LIST:
+ return false;
+ default:
+ break;
+ }
+
+ format_ptr = GET_RTX_FORMAT (code);
+ for (i = 0; i < GET_RTX_LENGTH (code); i++)
+ {
+ switch (*format_ptr++)
+ {
+ case 'e':
+ if (exact_dependency_1 (addr, XEXP (insn, i)))
+ return true;
+ break;
+ case 'E':
+ for (j = 0; j < XVECLEN (insn, i); j++)
+ if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
+ return true;
+ break;
+ }
+ }
+ return false;
+}
+
+/* Return true if there exists exact dependency for store & load, i.e.
+ the same memory address is used in them. */
+static bool
+exact_store_load_dependency (rtx store, rtx load)
+{
+ rtx set1, set2;
+
+ set1 = single_set (store);
+ if (!set1)
+ return false;
+ if (!MEM_P (SET_DEST (set1)))
+ return false;
+ set2 = single_set (load);
+ if (!set2)
+ return false;
+ if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
+ return true;
+ return false;
+}
+
static int
ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
{
@@ -24519,6 +24606,39 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
else
cost = 0;
}
+ break;
+
+ case PROCESSOR_SLM:
+ if (!reload_completed)
+ return cost;
+
+ /* Increase cost of integer loads. */
+ memory = get_attr_memory (dep_insn);
+ if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ {
+ enum attr_unit unit = get_attr_unit (dep_insn);
+ if (unit == UNIT_INTEGER && cost == 1)
+ {
+ if (memory == MEMORY_LOAD)
+ cost = 3;
+ else
+ {
+ /* Increase cost of ld/st for short int types only
+ because of store forwarding issue. */
+ rtx set = single_set (dep_insn);
+ if (set && (GET_MODE (SET_DEST (set)) == QImode
+ || GET_MODE (SET_DEST (set)) == HImode))
+ {
+ /* Increase cost of store/load insn if exact
+ dependence exists and it is load insn. */
+ enum attr_memory insn_memory = get_attr_memory (insn);
+ if (insn_memory == MEMORY_LOAD
+ && exact_store_load_dependency (dep_insn, insn))
+ cost = 3;
+ }
+ }
+ }
+ }
default:
break;
@@ -24565,110 +24685,204 @@ ia32_multipass_dfa_lookahead (void)
execution. It is applied if
(1) IMUL instruction is on the top of list;
(2) There exists the only producer of independent IMUL instruction in
- ready list;
- (3) Put found producer on the top of ready list.
- Returns issue rate. */
-
+ ready list.
+ Return index of IMUL producer if it was found and -1 otherwise. */
static int
-ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
- int clock_var ATTRIBUTE_UNUSED)
+do_reorder_for_imul (rtx *ready, int n_ready)
{
- static int issue_rate = -1;
- int n_ready = *pn_ready;
- rtx insn, insn1, insn2;
- int i;
+ rtx insn, set, insn1, insn2;
sd_iterator_def sd_it;
dep_t dep;
int index = -1;
+ int i;
- /* Set up issue rate. */
- issue_rate = ix86_issue_rate();
-
- /* Do reodering for Atom only. */
if (ix86_tune != PROCESSOR_ATOM)
- return issue_rate;
- /* Do not perform ready list reodering for pre-reload schedule pass. */
- if (!reload_completed)
- return issue_rate;
- /* Nothing to do if ready list contains only 1 instruction. */
- if (n_ready <= 1)
- return issue_rate;
+ return index;
/* Check that IMUL instruction is on the top of ready list. */
insn = ready[n_ready - 1];
- if (!NONDEBUG_INSN_P (insn))
- return issue_rate;
- insn = PATTERN (insn);
- if (GET_CODE (insn) == PARALLEL)
- insn = XVECEXP (insn, 0, 0);
- if (GET_CODE (insn) != SET)
- return issue_rate;
- if (!(GET_CODE (SET_SRC (insn)) == MULT
- && GET_MODE (SET_SRC (insn)) == SImode))
- return issue_rate;
+ set = single_set (insn);
+ if (!set)
+ return index;
+ if (!(GET_CODE (SET_SRC (set)) == MULT
+ && GET_MODE (SET_SRC (set)) == SImode))
+ return index;
/* Search for producer of independent IMUL instruction. */
- for (i = n_ready - 2; i>= 0; i--)
+ for (i = n_ready - 2; i >= 0; i--)
{
insn = ready[i];
if (!NONDEBUG_INSN_P (insn))
- continue;
+ continue;
/* Skip IMUL instruction. */
insn2 = PATTERN (insn);
if (GET_CODE (insn2) == PARALLEL)
- insn2 = XVECEXP (insn2, 0, 0);
+ insn2 = XVECEXP (insn2, 0, 0);
if (GET_CODE (insn2) == SET
- && GET_CODE (SET_SRC (insn2)) == MULT
- && GET_MODE (SET_SRC (insn2)) == SImode)
- continue;
+ && GET_CODE (SET_SRC (insn2)) == MULT
+ && GET_MODE (SET_SRC (insn2)) == SImode)
+ continue;
FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
- {
- rtx con;
+ {
+ rtx con;
con = DEP_CON (dep);
if (!NONDEBUG_INSN_P (con))
continue;
- insn1 = PATTERN (con);
- if (GET_CODE (insn1) == PARALLEL)
- insn1 = XVECEXP (insn1, 0, 0);
-
- if (GET_CODE (insn1) == SET
- && GET_CODE (SET_SRC (insn1)) == MULT
- && GET_MODE (SET_SRC (insn1)) == SImode)
- {
- sd_iterator_def sd_it1;
- dep_t dep1;
- /* Check if there is no other dependee for IMUL. */
- index = i;
- FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
- {
- rtx pro;
- pro = DEP_PRO (dep1);
+ insn1 = PATTERN (con);
+ if (GET_CODE (insn1) == PARALLEL)
+ insn1 = XVECEXP (insn1, 0, 0);
+
+ if (GET_CODE (insn1) == SET
+ && GET_CODE (SET_SRC (insn1)) == MULT
+ && GET_MODE (SET_SRC (insn1)) == SImode)
+ {
+ sd_iterator_def sd_it1;
+ dep_t dep1;
+ /* Check if there is no other dependee for IMUL. */
+ index = i;
+ FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep1);
if (!NONDEBUG_INSN_P (pro))
continue;
- if (pro != insn)
- index = -1;
- }
- if (index >= 0)
- break;
- }
- }
+ if (pro != insn)
+ index = -1;
+ }
+ if (index >= 0)
+ break;
+ }
+ }
if (index >= 0)
- break;
+ break;
}
- if (index < 0)
- return issue_rate; /* Didn't find IMUL producer. */
+ return index;
+}
- if (sched_verbose > 1)
- fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n",
- INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1]));
+/* Try to find the best candidate on the top of ready list if two insns
+ have the same priority - candidate is best if its dependees were
+ scheduled earlier. Applied for Silvermont only.
+ Return true if top 2 insns must be interchanged. */
+static bool
+swap_top_of_ready_list (rtx *ready, int n_ready)
+{
+ rtx top = ready[n_ready - 1];
+ rtx next = ready[n_ready - 2];
+ rtx set;
+ sd_iterator_def sd_it;
+ dep_t dep;
+ int clock1 = -1;
+ int clock2 = -1;
+ #define INSN_TICK(INSN) (HID (INSN)->tick)
- /* Put IMUL producer (ready[index]) at the top of ready list. */
- insn1= ready[index];
- for (i = index; i < n_ready - 1; i++)
- ready[i] = ready[i + 1];
- ready[n_ready - 1] = insn1;
+ if (ix86_tune != PROCESSOR_SLM)
+ return false;
+
+ if (!NONDEBUG_INSN_P (top))
+ return false;
+ if (!NONJUMP_INSN_P (top))
+ return false;
+ if (!NONDEBUG_INSN_P (next))
+ return false;
+ if (!NONJUMP_INSN_P (next))
+ return false;
+ set = single_set (top);
+ if (!set)
+ return false;
+ set = single_set (next);
+ if (!set)
+ return false;
+ if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
+ {
+ if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
+ return false;
+ /* Determine winner more precise. */
+ FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep);
+ if (!NONDEBUG_INSN_P (pro))
+ continue;
+ if (INSN_TICK (pro) > clock1)
+ clock1 = INSN_TICK (pro);
+ }
+ FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep);
+ if (!NONDEBUG_INSN_P (pro))
+ continue;
+ if (INSN_TICK (pro) > clock2)
+ clock2 = INSN_TICK (pro);
+ }
+
+ if (clock1 == clock2)
+ {
+ /* Determine winner - load must win. */
+ enum attr_memory memory1, memory2;
+ memory1 = get_attr_memory (top);
+ memory2 = get_attr_memory (next);
+ if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
+ return true;
+ }
+ return (bool) (clock2 < clock1);
+ }
+ return false;
+ #undef INSN_TICK
+}
+
+/* Perform possible reodering of ready list for Atom/Silvermont only.
+ Return issue rate. */
+static int
+ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+ int clock_var)
+{
+ int issue_rate = -1;
+ int n_ready = *pn_ready;
+ int i;
+ rtx insn;
+ int index = -1;
+
+ /* Set up issue rate. */
+ issue_rate = ix86_issue_rate ();
+
+ /* Do reodering for Atom/SLM only. */
+ if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM)
+ return issue_rate;
+
+ /* Nothing to do if ready list contains only 1 instruction. */
+ if (n_ready <= 1)
+ return issue_rate;
+
+ /* Do reodering for post-reload scheduler only. */
+ if (!reload_completed)
+ return issue_rate;
+
+ if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
+ {
+ if (sched_verbose > 1)
+ fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
+ INSN_UID (ready[index]));
+
+ /* Put IMUL producer (ready[index]) at the top of ready list. */
+ insn = ready[index];
+ for (i = index; i < n_ready - 1; i++)
+ ready[i] = ready[i + 1];
+ ready[n_ready - 1] = insn;
+ return issue_rate;
+ }
+ if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
+ {
+ if (sched_verbose > 1)
+ fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
+ INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
+ /* Swap 2 top elements of ready list. */
+ insn = ready[n_ready - 1];
+ ready[n_ready - 1] = ready[n_ready - 2];
+ ready[n_ready - 2] = insn;
+ }
return issue_rate;
}
@@ -25161,11 +25375,12 @@ ix86_constant_alignment (tree exp, int align)
instead of that alignment to align the object. */
int
-ix86_data_alignment (tree type, int align)
+ix86_data_alignment (tree type, int align, bool opt)
{
int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
- if (AGGREGATE_TYPE_P (type)
+ if (opt
+ && AGGREGATE_TYPE_P (type)
&& TYPE_SIZE (type)
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
&& (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
@@ -25177,14 +25392,17 @@ ix86_data_alignment (tree type, int align)
to 16byte boundary. */
if (TARGET_64BIT)
{
- if (AGGREGATE_TYPE_P (type)
- && TYPE_SIZE (type)
- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
- && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
- || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
return 128;
}
+ if (!opt)
+ return align;
+
if (TREE_CODE (type) == ARRAY_TYPE)
{
if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
@@ -29834,11 +30052,11 @@ fold_builtin_cpu (tree fndecl, tree *args)
M_AMD,
M_CPU_TYPE_START,
M_INTEL_ATOM,
- M_INTEL_SLM,
M_INTEL_CORE2,
M_INTEL_COREI7,
M_AMDFAM10H,
M_AMDFAM15H,
+ M_INTEL_SLM,
M_CPU_SUBTYPE_START,
M_INTEL_COREI7_NEHALEM,
M_INTEL_COREI7_WESTMERE,
@@ -33737,6 +33955,8 @@ static inline bool
inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
enum machine_mode mode, int strict)
{
+ if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
+ return false;
if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
|| MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
|| MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 776582a66de..7d940f98804 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -333,6 +333,7 @@ enum ix86_tune_indices {
X86_TUNE_REASSOC_FP_TO_PARALLEL,
X86_TUNE_GENERAL_REGS_SSE_SPILL,
X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
+ X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
X86_TUNE_LAST
};
@@ -443,6 +444,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
+#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
+ ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
@@ -856,7 +859,18 @@ enum target_cpu_default
cause character arrays to be word-aligned so that `strcpy' calls
that copy constants to character arrays can be done inline. */
-#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN))
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+ ix86_data_alignment ((TYPE), (ALIGN), true)
+
+/* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates
+ some alignment increase, instead of optimization only purposes. E.g.
+ AMD x86-64 psABI says that variables with array type larger than 15 bytes
+ must be aligned to 16 byte boundaries.
+
+ If this macro is not defined, then ALIGN is used. */
+
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+ ix86_data_alignment ((TYPE), (ALIGN), false)
/* If defined, a C expression to compute the alignment for a local
variable. TYPE is the data type, and ALIGN is the alignment that
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 28b0c78093a..e97a4570501 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -3625,6 +3625,18 @@
CONST0_RTX (V4SFmode), operands[1]));
})
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+ [(set (match_operand:DF 0 "register_operand")
+ (float_extend:DF
+ (match_operand:SF 1 "memory_operand")))]
+ "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && SSE_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float_extend:DF (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+
(define_insn "*extendsfdf2_mixed"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
(float_extend:DF
@@ -3766,6 +3778,18 @@
CONST0_RTX (V2DFmode), operands[1]));
})
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+ [(set (match_operand:SF 0 "register_operand")
+ (float_truncate:SF
+ (match_operand:DF 1 "memory_operand")))]
+ "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && SSE_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+
(define_expand "truncdfsf2_with_temp"
[(parallel [(set (match_operand:SF 0)
(float_truncate:SF (match_operand:DF 1)))
@@ -16567,6 +16591,7 @@
"(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
&& peep2_reg_dead_p (4, operands[0])
&& !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
&& (<MODE>mode != QImode
|| immediate_operand (operands[2], QImode)
|| q_regs_operand (operands[2], QImode))
@@ -16631,6 +16656,7 @@
|| immediate_operand (operands[2], SImode)
|| q_regs_operand (operands[2], SImode))
&& !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
&& ix86_match_ccmode (peep2_next_insn (3),
(GET_CODE (operands[3]) == PLUS
|| GET_CODE (operands[3]) == MINUS)
diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def
index f08fad6778d..d7db0ba96d6 100644
--- a/gcc/config/mips/mips-cpus.def
+++ b/gcc/config/mips/mips-cpus.def
@@ -68,6 +68,7 @@ MIPS_CPU ("r4600", PROCESSOR_R4600, 3, 0)
MIPS_CPU ("orion", PROCESSOR_R4600, 3, 0)
MIPS_CPU ("r4650", PROCESSOR_R4650, 3, 0)
MIPS_CPU ("r4700", PROCESSOR_R4700, 3, 0)
+MIPS_CPU ("r5900", PROCESSOR_R5900, 3, 0)
/* ST Loongson 2E/2F processors. */
MIPS_CPU ("loongson2e", PROCESSOR_LOONGSON_2E, 3, PTF_AVOID_BRANCHLIKELY)
MIPS_CPU ("loongson2f", PROCESSOR_LOONGSON_2F, 3, PTF_AVOID_BRANCHLIKELY)
diff --git a/gcc/config/mips/mips-dsp.md b/gcc/config/mips/mips-dsp.md
index 002c9992001..49a08689638 100644
--- a/gcc/config/mips/mips-dsp.md
+++ b/gcc/config/mips/mips-dsp.md
@@ -1131,8 +1131,7 @@
"ISA_HAS_L<SHORT:SIZE><U>X"
"l<SHORT:size><u>x\t%0,%2(%1)"
[(set_attr "type" "load")
- (set_attr "mode" "<GPR:MODE>")
- (set_attr "length" "4")])
+ (set_attr "mode" "<GPR:MODE>")])
(define_expand "mips_lhx"
[(match_operand:SI 0 "register_operand")
@@ -1165,8 +1164,7 @@
"ISA_HAS_L<GPR:SIZE>X"
"l<GPR:size>x\t%0,%2(%1)"
[(set_attr "type" "load")
- (set_attr "mode" "<GPR:MODE>")
- (set_attr "length" "4")])
+ (set_attr "mode" "<GPR:MODE>")])
(define_insn "*mips_lw<u>x_<P:mode>_ext"
[(set (match_operand:DI 0 "register_operand" "=d")
@@ -1176,8 +1174,7 @@
"ISA_HAS_LW<U>X && TARGET_64BIT"
"lw<u>x\t%0,%2(%1)"
[(set_attr "type" "load")
- (set_attr "mode" "DI")
- (set_attr "length" "4")])
+ (set_attr "mode" "DI")])
;; Table 2-8. MIPS DSP ASE Instructions: Branch
;; BPOSGE32
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index 9c70cc4324f..a22c7829b77 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -481,7 +481,7 @@
operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
}
[(set_attr "type" "fcmp")
- (set_attr "length" "8")
+ (set_attr "insn_count" "2")
(set_attr "mode" "FPSW")])
(define_insn_and_split "mips_cabs_cond_4s"
@@ -510,7 +510,7 @@
operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
}
[(set_attr "type" "fcmp")
- (set_attr "length" "8")
+ (set_attr "insn_count" "2")
(set_attr "mode" "FPSW")])
diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt
index 0d7fa26510d..8ed412cc15e 100644
--- a/gcc/config/mips/mips-tables.opt
+++ b/gcc/config/mips/mips-tables.opt
@@ -208,425 +208,431 @@ EnumValue
Enum(mips_arch_opt_value) String(4700) Value(22)
EnumValue
-Enum(mips_arch_opt_value) String(loongson2e) Value(23) Canonical
+Enum(mips_arch_opt_value) String(r5900) Value(23) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(loongson2f) Value(24) Canonical
+Enum(mips_arch_opt_value) String(5900) Value(23)
EnumValue
-Enum(mips_arch_opt_value) String(r8000) Value(25) Canonical
+Enum(mips_arch_opt_value) String(loongson2e) Value(24) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r8k) Value(25)
+Enum(mips_arch_opt_value) String(loongson2f) Value(25) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(8000) Value(25)
+Enum(mips_arch_opt_value) String(r8000) Value(26) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(8k) Value(25)
+Enum(mips_arch_opt_value) String(r8k) Value(26)
EnumValue
-Enum(mips_arch_opt_value) String(r10000) Value(26) Canonical
+Enum(mips_arch_opt_value) String(8000) Value(26)
EnumValue
-Enum(mips_arch_opt_value) String(r10k) Value(26)
+Enum(mips_arch_opt_value) String(8k) Value(26)
EnumValue
-Enum(mips_arch_opt_value) String(10000) Value(26)
+Enum(mips_arch_opt_value) String(r10000) Value(27) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(10k) Value(26)
+Enum(mips_arch_opt_value) String(r10k) Value(27)
EnumValue
-Enum(mips_arch_opt_value) String(r12000) Value(27) Canonical
+Enum(mips_arch_opt_value) String(10000) Value(27)
EnumValue
-Enum(mips_arch_opt_value) String(r12k) Value(27)
+Enum(mips_arch_opt_value) String(10k) Value(27)
EnumValue
-Enum(mips_arch_opt_value) String(12000) Value(27)
+Enum(mips_arch_opt_value) String(r12000) Value(28) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(12k) Value(27)
+Enum(mips_arch_opt_value) String(r12k) Value(28)
EnumValue
-Enum(mips_arch_opt_value) String(r14000) Value(28) Canonical
+Enum(mips_arch_opt_value) String(12000) Value(28)
EnumValue
-Enum(mips_arch_opt_value) String(r14k) Value(28)
+Enum(mips_arch_opt_value) String(12k) Value(28)
EnumValue
-Enum(mips_arch_opt_value) String(14000) Value(28)
+Enum(mips_arch_opt_value) String(r14000) Value(29) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(14k) Value(28)
+Enum(mips_arch_opt_value) String(r14k) Value(29)
EnumValue
-Enum(mips_arch_opt_value) String(r16000) Value(29) Canonical
+Enum(mips_arch_opt_value) String(14000) Value(29)
EnumValue
-Enum(mips_arch_opt_value) String(r16k) Value(29)
+Enum(mips_arch_opt_value) String(14k) Value(29)
EnumValue
-Enum(mips_arch_opt_value) String(16000) Value(29)
+Enum(mips_arch_opt_value) String(r16000) Value(30) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(16k) Value(29)
+Enum(mips_arch_opt_value) String(r16k) Value(30)
EnumValue
-Enum(mips_arch_opt_value) String(vr5000) Value(30) Canonical
+Enum(mips_arch_opt_value) String(16000) Value(30)
EnumValue
-Enum(mips_arch_opt_value) String(vr5k) Value(30)
+Enum(mips_arch_opt_value) String(16k) Value(30)
EnumValue
-Enum(mips_arch_opt_value) String(5000) Value(30)
+Enum(mips_arch_opt_value) String(vr5000) Value(31) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(5k) Value(30)
+Enum(mips_arch_opt_value) String(vr5k) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(r5000) Value(30)
+Enum(mips_arch_opt_value) String(5000) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(r5k) Value(30)
+Enum(mips_arch_opt_value) String(5k) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(vr5400) Value(31) Canonical
+Enum(mips_arch_opt_value) String(r5000) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(5400) Value(31)
+Enum(mips_arch_opt_value) String(r5k) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(r5400) Value(31)
+Enum(mips_arch_opt_value) String(vr5400) Value(32) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(vr5500) Value(32) Canonical
+Enum(mips_arch_opt_value) String(5400) Value(32)
EnumValue
-Enum(mips_arch_opt_value) String(5500) Value(32)
+Enum(mips_arch_opt_value) String(r5400) Value(32)
EnumValue
-Enum(mips_arch_opt_value) String(r5500) Value(32)
+Enum(mips_arch_opt_value) String(vr5500) Value(33) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(rm7000) Value(33) Canonical
+Enum(mips_arch_opt_value) String(5500) Value(33)
EnumValue
-Enum(mips_arch_opt_value) String(rm7k) Value(33)
+Enum(mips_arch_opt_value) String(r5500) Value(33)
EnumValue
-Enum(mips_arch_opt_value) String(7000) Value(33)
+Enum(mips_arch_opt_value) String(rm7000) Value(34) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(7k) Value(33)
+Enum(mips_arch_opt_value) String(rm7k) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(r7000) Value(33)
+Enum(mips_arch_opt_value) String(7000) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(r7k) Value(33)
+Enum(mips_arch_opt_value) String(7k) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(rm9000) Value(34) Canonical
+Enum(mips_arch_opt_value) String(r7000) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(rm9k) Value(34)
+Enum(mips_arch_opt_value) String(r7k) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(9000) Value(34)
+Enum(mips_arch_opt_value) String(rm9000) Value(35) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(9k) Value(34)
+Enum(mips_arch_opt_value) String(rm9k) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(r9000) Value(34)
+Enum(mips_arch_opt_value) String(9000) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(r9k) Value(34)
+Enum(mips_arch_opt_value) String(9k) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(4kc) Value(35) Canonical
+Enum(mips_arch_opt_value) String(r9000) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(r4kc) Value(35)
+Enum(mips_arch_opt_value) String(r9k) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(4km) Value(36) Canonical
+Enum(mips_arch_opt_value) String(4kc) Value(36) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4km) Value(36)
+Enum(mips_arch_opt_value) String(r4kc) Value(36)
EnumValue
-Enum(mips_arch_opt_value) String(4kp) Value(37) Canonical
+Enum(mips_arch_opt_value) String(4km) Value(37) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4kp) Value(37)
+Enum(mips_arch_opt_value) String(r4km) Value(37)
EnumValue
-Enum(mips_arch_opt_value) String(4ksc) Value(38) Canonical
+Enum(mips_arch_opt_value) String(4kp) Value(38) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4ksc) Value(38)
+Enum(mips_arch_opt_value) String(r4kp) Value(38)
EnumValue
-Enum(mips_arch_opt_value) String(m4k) Value(39) Canonical
+Enum(mips_arch_opt_value) String(4ksc) Value(39) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(m14kc) Value(40) Canonical
+Enum(mips_arch_opt_value) String(r4ksc) Value(39)
EnumValue
-Enum(mips_arch_opt_value) String(m14k) Value(41) Canonical
+Enum(mips_arch_opt_value) String(m4k) Value(40) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(4kec) Value(42) Canonical
+Enum(mips_arch_opt_value) String(m14kc) Value(41) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4kec) Value(42)
+Enum(mips_arch_opt_value) String(m14k) Value(42) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(4kem) Value(43) Canonical
+Enum(mips_arch_opt_value) String(4kec) Value(43) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4kem) Value(43)
+Enum(mips_arch_opt_value) String(r4kec) Value(43)
EnumValue
-Enum(mips_arch_opt_value) String(4kep) Value(44) Canonical
+Enum(mips_arch_opt_value) String(4kem) Value(44) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4kep) Value(44)
+Enum(mips_arch_opt_value) String(r4kem) Value(44)
EnumValue
-Enum(mips_arch_opt_value) String(4ksd) Value(45) Canonical
+Enum(mips_arch_opt_value) String(4kep) Value(45) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4ksd) Value(45)
+Enum(mips_arch_opt_value) String(r4kep) Value(45)
EnumValue
-Enum(mips_arch_opt_value) String(24kc) Value(46) Canonical
+Enum(mips_arch_opt_value) String(4ksd) Value(46) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kc) Value(46)
+Enum(mips_arch_opt_value) String(r4ksd) Value(46)
EnumValue
-Enum(mips_arch_opt_value) String(24kf2_1) Value(47) Canonical
+Enum(mips_arch_opt_value) String(24kc) Value(47) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kf2_1) Value(47)
+Enum(mips_arch_opt_value) String(r24kc) Value(47)
EnumValue
-Enum(mips_arch_opt_value) String(24kf) Value(48) Canonical
+Enum(mips_arch_opt_value) String(24kf2_1) Value(48) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kf) Value(48)
+Enum(mips_arch_opt_value) String(r24kf2_1) Value(48)
EnumValue
-Enum(mips_arch_opt_value) String(24kf1_1) Value(49) Canonical
+Enum(mips_arch_opt_value) String(24kf) Value(49) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kf1_1) Value(49)
+Enum(mips_arch_opt_value) String(r24kf) Value(49)
EnumValue
-Enum(mips_arch_opt_value) String(24kfx) Value(50) Canonical
+Enum(mips_arch_opt_value) String(24kf1_1) Value(50) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kfx) Value(50)
+Enum(mips_arch_opt_value) String(r24kf1_1) Value(50)
EnumValue
-Enum(mips_arch_opt_value) String(24kx) Value(51) Canonical
+Enum(mips_arch_opt_value) String(24kfx) Value(51) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kx) Value(51)
+Enum(mips_arch_opt_value) String(r24kfx) Value(51)
EnumValue
-Enum(mips_arch_opt_value) String(24kec) Value(52) Canonical
+Enum(mips_arch_opt_value) String(24kx) Value(52) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kec) Value(52)
+Enum(mips_arch_opt_value) String(r24kx) Value(52)
EnumValue
-Enum(mips_arch_opt_value) String(24kef2_1) Value(53) Canonical
+Enum(mips_arch_opt_value) String(24kec) Value(53) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kef2_1) Value(53)
+Enum(mips_arch_opt_value) String(r24kec) Value(53)
EnumValue
-Enum(mips_arch_opt_value) String(24kef) Value(54) Canonical
+Enum(mips_arch_opt_value) String(24kef2_1) Value(54) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kef) Value(54)
+Enum(mips_arch_opt_value) String(r24kef2_1) Value(54)
EnumValue
-Enum(mips_arch_opt_value) String(24kef1_1) Value(55) Canonical
+Enum(mips_arch_opt_value) String(24kef) Value(55) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kef1_1) Value(55)
+Enum(mips_arch_opt_value) String(r24kef) Value(55)
EnumValue
-Enum(mips_arch_opt_value) String(24kefx) Value(56) Canonical
+Enum(mips_arch_opt_value) String(24kef1_1) Value(56) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kefx) Value(56)
+Enum(mips_arch_opt_value) String(r24kef1_1) Value(56)
EnumValue
-Enum(mips_arch_opt_value) String(24kex) Value(57) Canonical
+Enum(mips_arch_opt_value) String(24kefx) Value(57) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kex) Value(57)
+Enum(mips_arch_opt_value) String(r24kefx) Value(57)
EnumValue
-Enum(mips_arch_opt_value) String(34kc) Value(58) Canonical
+Enum(mips_arch_opt_value) String(24kex) Value(58) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kc) Value(58)
+Enum(mips_arch_opt_value) String(r24kex) Value(58)
EnumValue
-Enum(mips_arch_opt_value) String(34kf2_1) Value(59) Canonical
+Enum(mips_arch_opt_value) String(34kc) Value(59) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kf2_1) Value(59)
+Enum(mips_arch_opt_value) String(r34kc) Value(59)
EnumValue
-Enum(mips_arch_opt_value) String(34kf) Value(60) Canonical
+Enum(mips_arch_opt_value) String(34kf2_1) Value(60) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kf) Value(60)
+Enum(mips_arch_opt_value) String(r34kf2_1) Value(60)
EnumValue
-Enum(mips_arch_opt_value) String(34kf1_1) Value(61) Canonical
+Enum(mips_arch_opt_value) String(34kf) Value(61) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kf1_1) Value(61)
+Enum(mips_arch_opt_value) String(r34kf) Value(61)
EnumValue
-Enum(mips_arch_opt_value) String(34kfx) Value(62) Canonical
+Enum(mips_arch_opt_value) String(34kf1_1) Value(62) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kfx) Value(62)
+Enum(mips_arch_opt_value) String(r34kf1_1) Value(62)
EnumValue
-Enum(mips_arch_opt_value) String(34kx) Value(63) Canonical
+Enum(mips_arch_opt_value) String(34kfx) Value(63) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kx) Value(63)
+Enum(mips_arch_opt_value) String(r34kfx) Value(63)
EnumValue
-Enum(mips_arch_opt_value) String(34kn) Value(64) Canonical
+Enum(mips_arch_opt_value) String(34kx) Value(64) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kn) Value(64)
+Enum(mips_arch_opt_value) String(r34kx) Value(64)
EnumValue
-Enum(mips_arch_opt_value) String(74kc) Value(65) Canonical
+Enum(mips_arch_opt_value) String(34kn) Value(65) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kc) Value(65)
+Enum(mips_arch_opt_value) String(r34kn) Value(65)
EnumValue
-Enum(mips_arch_opt_value) String(74kf2_1) Value(66) Canonical
+Enum(mips_arch_opt_value) String(74kc) Value(66) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kf2_1) Value(66)
+Enum(mips_arch_opt_value) String(r74kc) Value(66)
EnumValue
-Enum(mips_arch_opt_value) String(74kf) Value(67) Canonical
+Enum(mips_arch_opt_value) String(74kf2_1) Value(67) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kf) Value(67)
+Enum(mips_arch_opt_value) String(r74kf2_1) Value(67)
EnumValue
-Enum(mips_arch_opt_value) String(74kf1_1) Value(68) Canonical
+Enum(mips_arch_opt_value) String(74kf) Value(68) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kf1_1) Value(68)
+Enum(mips_arch_opt_value) String(r74kf) Value(68)
EnumValue
-Enum(mips_arch_opt_value) String(74kfx) Value(69) Canonical
+Enum(mips_arch_opt_value) String(74kf1_1) Value(69) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kfx) Value(69)
+Enum(mips_arch_opt_value) String(r74kf1_1) Value(69)
EnumValue
-Enum(mips_arch_opt_value) String(74kx) Value(70) Canonical
+Enum(mips_arch_opt_value) String(74kfx) Value(70) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kx) Value(70)
+Enum(mips_arch_opt_value) String(r74kfx) Value(70)
EnumValue
-Enum(mips_arch_opt_value) String(74kf3_2) Value(71) Canonical
+Enum(mips_arch_opt_value) String(74kx) Value(71) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kf3_2) Value(71)
+Enum(mips_arch_opt_value) String(r74kx) Value(71)
EnumValue
-Enum(mips_arch_opt_value) String(1004kc) Value(72) Canonical
+Enum(mips_arch_opt_value) String(74kf3_2) Value(72) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r1004kc) Value(72)
+Enum(mips_arch_opt_value) String(r74kf3_2) Value(72)
EnumValue
-Enum(mips_arch_opt_value) String(1004kf2_1) Value(73) Canonical
+Enum(mips_arch_opt_value) String(1004kc) Value(73) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r1004kf2_1) Value(73)
+Enum(mips_arch_opt_value) String(r1004kc) Value(73)
EnumValue
-Enum(mips_arch_opt_value) String(1004kf) Value(74) Canonical
+Enum(mips_arch_opt_value) String(1004kf2_1) Value(74) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r1004kf) Value(74)
+Enum(mips_arch_opt_value) String(r1004kf2_1) Value(74)
EnumValue
-Enum(mips_arch_opt_value) String(1004kf1_1) Value(75) Canonical
+Enum(mips_arch_opt_value) String(1004kf) Value(75) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r1004kf1_1) Value(75)
+Enum(mips_arch_opt_value) String(r1004kf) Value(75)
EnumValue
-Enum(mips_arch_opt_value) String(5kc) Value(76) Canonical
+Enum(mips_arch_opt_value) String(1004kf1_1) Value(76) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r5kc) Value(76)
+Enum(mips_arch_opt_value) String(r1004kf1_1) Value(76)
EnumValue
-Enum(mips_arch_opt_value) String(5kf) Value(77) Canonical
+Enum(mips_arch_opt_value) String(5kc) Value(77) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r5kf) Value(77)
+Enum(mips_arch_opt_value) String(r5kc) Value(77)
EnumValue
-Enum(mips_arch_opt_value) String(20kc) Value(78) Canonical
+Enum(mips_arch_opt_value) String(5kf) Value(78) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r20kc) Value(78)
+Enum(mips_arch_opt_value) String(r5kf) Value(78)
EnumValue
-Enum(mips_arch_opt_value) String(sb1) Value(79) Canonical
+Enum(mips_arch_opt_value) String(20kc) Value(79) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(sb1a) Value(80) Canonical
+Enum(mips_arch_opt_value) String(r20kc) Value(79)
EnumValue
-Enum(mips_arch_opt_value) String(sr71000) Value(81) Canonical
+Enum(mips_arch_opt_value) String(sb1) Value(80) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(sr71k) Value(81)
+Enum(mips_arch_opt_value) String(sb1a) Value(81) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(xlr) Value(82) Canonical
+Enum(mips_arch_opt_value) String(sr71000) Value(82) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(loongson3a) Value(83) Canonical
+Enum(mips_arch_opt_value) String(sr71k) Value(82)
EnumValue
-Enum(mips_arch_opt_value) String(octeon) Value(84) Canonical
+Enum(mips_arch_opt_value) String(xlr) Value(83) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(octeon+) Value(85) Canonical
+Enum(mips_arch_opt_value) String(loongson3a) Value(84) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(octeon2) Value(86) Canonical
+Enum(mips_arch_opt_value) String(octeon) Value(85) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(xlp) Value(87) Canonical
+Enum(mips_arch_opt_value) String(octeon+) Value(86) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon2) Value(87) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(xlp) Value(88) Canonical
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 222c768b9b9..bd1d10b0e4e 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -1029,6 +1029,19 @@ static const struct mips_rtx_cost_data
1, /* branch_cost */
4 /* memory_latency */
},
+ { /* R5900 */
+ COSTS_N_INSNS (4), /* fp_add */
+ COSTS_N_INSNS (4), /* fp_mult_sf */
+ COSTS_N_INSNS (256), /* fp_mult_df */
+ COSTS_N_INSNS (8), /* fp_div_sf */
+ COSTS_N_INSNS (256), /* fp_div_df */
+ COSTS_N_INSNS (4), /* int_mult_si */
+ COSTS_N_INSNS (256), /* int_mult_di */
+ COSTS_N_INSNS (37), /* int_div_si */
+ COSTS_N_INSNS (256), /* int_div_di */
+ 1, /* branch_cost */
+ 4 /* memory_latency */
+ },
{ /* R7000 */
/* The only costs that are changed here are
integer multiplication. */
@@ -12450,7 +12463,10 @@ mips_start_ll_sc_sync_block (void)
if (!ISA_HAS_LL_SC)
{
output_asm_insn (".set\tpush", 0);
- output_asm_insn (".set\tmips2", 0);
+ if (TARGET_64BIT)
+ output_asm_insn (".set\tmips3", 0);
+ else
+ output_asm_insn (".set\tmips2", 0);
}
}
@@ -13005,6 +13021,7 @@ mips_issue_rate (void)
case PROCESSOR_R4130:
case PROCESSOR_R5400:
case PROCESSOR_R5500:
+ case PROCESSOR_R5900:
case PROCESSOR_R7000:
case PROCESSOR_R9000:
case PROCESSOR_OCTEON:
@@ -16025,8 +16042,9 @@ mips_reorg_process_insns (void)
cfun->machine->all_noreorder_p = false;
/* Code compiled with -mfix-vr4120 or -mfix-24k can't be all noreorder
- because we rely on the assembler to work around some errata. */
- if (TARGET_FIX_VR4120 || TARGET_FIX_24K)
+ because we rely on the assembler to work around some errata.
+ The r5900 too has several bugs. */
+ if (TARGET_FIX_VR4120 || TARGET_FIX_24K || TARGET_MIPS5900)
cfun->machine->all_noreorder_p = false;
/* The same is true for -mfix-vr4130 if we might generate MFLO or
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index a8cf1dbedc7..ff631c1a30b 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -222,6 +222,7 @@ struct mips_cpu_info {
#define TARGET_MIPS4130 (mips_arch == PROCESSOR_R4130)
#define TARGET_MIPS5400 (mips_arch == PROCESSOR_R5400)
#define TARGET_MIPS5500 (mips_arch == PROCESSOR_R5500)
+#define TARGET_MIPS5900 (mips_arch == PROCESSOR_R5900)
#define TARGET_MIPS7000 (mips_arch == PROCESSOR_R7000)
#define TARGET_MIPS9000 (mips_arch == PROCESSOR_R9000)
#define TARGET_OCTEON (mips_arch == PROCESSOR_OCTEON \
@@ -399,6 +400,9 @@ struct mips_cpu_info {
if (TARGET_MCU) \
builtin_define ("__mips_mcu"); \
\
+ if (TARGET_EVA) \
+ builtin_define ("__mips_eva"); \
+ \
if (TARGET_DSP) \
{ \
builtin_define ("__mips_dsp"); \
@@ -803,6 +807,7 @@ struct mips_cpu_info {
#define ISA_HAS_MUL3 ((TARGET_MIPS3900 \
|| TARGET_MIPS5400 \
|| TARGET_MIPS5500 \
+ || TARGET_MIPS5900 \
|| TARGET_MIPS7000 \
|| TARGET_MIPS9000 \
|| TARGET_MAD \
@@ -817,6 +822,22 @@ struct mips_cpu_info {
&& TARGET_OCTEON \
&& !TARGET_MIPS16)
+/* ISA supports instructions DMULT and DMULTU. */
+#define ISA_HAS_DMULT (TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions MULT and MULTU.
+ This is always true, but the macro is needed for ISA_HAS_<D>MULT
+ in mips.md. */
+#define ISA_HAS_MULT (1)
+
+/* ISA supports instructions DDIV and DDIVU. */
+#define ISA_HAS_DDIV (TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions DIV and DIVU.
+ This is always true, but the macro is needed for ISA_HAS_<D>DIV
+ in mips.md. */
+#define ISA_HAS_DIV (1)
+
#define ISA_HAS_DIV3 ((TARGET_LOONGSON_2EF \
|| TARGET_LOONGSON_3A) \
&& !TARGET_MIPS16)
@@ -833,7 +854,9 @@ struct mips_cpu_info {
/* ISA has the integer conditional move instructions introduced in mips4 and
ST Loongson 2E/2F. */
-#define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE || TARGET_LOONGSON_2EF)
+#define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE \
+ || TARGET_MIPS5900 \
+ || TARGET_LOONGSON_2EF)
/* ISA has LDC1 and SDC1. */
#define ISA_HAS_LDC1_SDC1 (!ISA_MIPS1 && !TARGET_MIPS16)
@@ -946,6 +969,7 @@ struct mips_cpu_info {
/* ISA has data prefetch instructions. This controls use of 'pref'. */
#define ISA_HAS_PREFETCH ((ISA_MIPS4 \
|| TARGET_LOONGSON_2EF \
+ || TARGET_MIPS5900 \
|| ISA_MIPS32 \
|| ISA_MIPS32R2 \
|| ISA_MIPS64 \
@@ -1007,15 +1031,18 @@ struct mips_cpu_info {
and "addiu $4,$4,1". */
#define ISA_HAS_LOAD_DELAY (ISA_MIPS1 \
&& !TARGET_MIPS3900 \
+ && !TARGET_MIPS5900 \
&& !TARGET_MIPS16 \
&& !TARGET_MICROMIPS)
/* Likewise mtc1 and mfc1. */
#define ISA_HAS_XFER_DELAY (mips_isa <= 3 \
+ && !TARGET_MIPS5900 \
&& !TARGET_LOONGSON_2EF)
/* Likewise floating-point comparisons. */
#define ISA_HAS_FCMP_DELAY (mips_isa <= 3 \
+ && !TARGET_MIPS5900 \
&& !TARGET_LOONGSON_2EF)
/* True if mflo and mfhi can be immediately followed by instructions
@@ -1035,6 +1062,7 @@ struct mips_cpu_info {
|| ISA_MIPS64 \
|| ISA_MIPS64R2 \
|| TARGET_MIPS5500 \
+ || TARGET_MIPS5900 \
|| TARGET_LOONGSON_2EF)
/* ISA includes synci, jr.hb and jalr.hb. */
@@ -1052,7 +1080,7 @@ struct mips_cpu_info {
/* ISA includes ll and sc. Note that this implies ISA_HAS_SYNC
because the expanders use both ISA_HAS_SYNC and ISA_HAS_LL_SC
instructions. */
-#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS16)
+#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS5900 && !TARGET_MIPS16)
#define GENERATE_LL_SC \
(target_flags_explicit & MASK_LLSC \
? TARGET_LLSC && !TARGET_MIPS16 \
@@ -1125,6 +1153,7 @@ struct mips_cpu_info {
%{mdsp} %{mno-dsp} \
%{mdspr2} %{mno-dspr2} \
%{mmcu} %{mno-mcu} \
+%{meva} %{mno-eva} \
%{msmartmips} %{mno-smartmips} \
%{mmt} %{mno-mt} \
%{mfix-vr4120} %{mfix-vr4130} \
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 6f6484b0d8c..ce322d8bc36 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -55,6 +55,7 @@
r5000
r5400
r5500
+ r5900
r7000
r8000
r9000
@@ -406,8 +407,12 @@
;; Is this an extended instruction in mips16 mode?
(define_attr "extended_mips16" "no,yes"
- (if_then_else (ior (eq_attr "move_type" "sll0")
- (eq_attr "jal" "direct"))
+ (if_then_else (ior ;; In general, constant-pool loads are extended
+ ;; instructions. We don't yet optimize for 16-bit
+ ;; PC-relative references.
+ (eq_attr "move_type" "sll0,loadpool")
+ (eq_attr "jal" "direct")
+ (eq_attr "got" "load"))
(const_string "yes")
(const_string "no")))
@@ -420,14 +425,89 @@
(match_test "TARGET_MICROMIPS")))
(const_string "yes")
(const_string "no")))
-
-;; Length of instruction in bytes.
-(define_attr "length" ""
- (cond [(and (eq_attr "extended_mips16" "yes")
- (match_test "TARGET_MIPS16"))
- (const_int 4)
- (and (eq_attr "compression" "micromips,all")
+;; The number of individual instructions that a non-branch pattern generates,
+;; using units of BASE_INSN_LENGTH.
+(define_attr "insn_count" ""
+ (cond [;; "Ghost" instructions occupy no space.
+ (eq_attr "type" "ghost")
+ (const_int 0)
+
+ ;; Extended instructions count as 2.
+ (and (eq_attr "extended_mips16" "yes")
+ (match_test "TARGET_MIPS16"))
+ (const_int 2)
+
+ ;; A GOT load followed by an add of $gp. This is not used for MIPS16.
+ (eq_attr "got" "xgot_high")
+ (const_int 2)
+
+ ;; SHIFT_SHIFTs are decomposed into two separate instructions.
+ ;; They are extended instructions on MIPS16 targets.
+ (eq_attr "move_type" "shift_shift")
+ (if_then_else (match_test "TARGET_MIPS16")
+ (const_int 4)
+ (const_int 2))
+
+ ;; Check for doubleword moves that are decomposed into two
+ ;; instructions. The individual instructions are unextended
+ ;; MIPS16 ones.
+ (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
+ (eq_attr "dword_mode" "yes"))
+ (const_int 2)
+
+ ;; Constants, loads and stores are handled by external routines.
+ (and (eq_attr "move_type" "const,constN")
+ (eq_attr "dword_mode" "yes"))
+ (symbol_ref "mips_split_const_insns (operands[1])")
+ (eq_attr "move_type" "const,constN")
+ (symbol_ref "mips_const_insns (operands[1])")
+ (eq_attr "move_type" "load,fpload")
+ (symbol_ref "mips_load_store_insns (operands[1], insn)")
+ (eq_attr "move_type" "store,fpstore")
+ (symbol_ref "mips_load_store_insns (operands[0], insn)
+ + (TARGET_FIX_24K ? 1 : 0)")
+
+ ;; In the worst case, a call macro will take 8 instructions:
+ ;;
+ ;; lui $25,%call_hi(FOO)
+ ;; addu $25,$25,$28
+ ;; lw $25,%call_lo(FOO)($25)
+ ;; nop
+ ;; jalr $25
+ ;; nop
+ ;; lw $gp,X($sp)
+ ;; nop
+ (eq_attr "jal_macro" "yes")
+ (const_int 8)
+
+ ;; Various VR4120 errata require a nop to be inserted after a macc
+ ;; instruction. The assembler does this for us, so account for
+ ;; the worst-case length here.
+ (and (eq_attr "type" "imadd")
+ (match_test "TARGET_FIX_VR4120"))
+ (const_int 2)
+
+ ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
+ ;; the result of the second one is missed. The assembler should work
+ ;; around this by inserting a nop after the first dmult.
+ (and (eq_attr "type" "imul,imul3")
+ (eq_attr "mode" "DI")
+ (match_test "TARGET_FIX_VR4120"))
+ (const_int 2)
+
+ (eq_attr "type" "idiv,idiv3")
+ (symbol_ref "mips_idiv_insns ()")
+
+ (not (eq_attr "sync_mem" "none"))
+ (symbol_ref "mips_sync_loop_insns (insn, operands)")]
+ (const_int 1)))
+
+;; Length of instruction in bytes. The default is derived from "insn_count",
+;; but there are special cases for branches (which must be handled here)
+;; and for compressed single instructions.
+(define_attr "length" ""
+ (cond [(and (eq_attr "compression" "micromips,all")
(eq_attr "dword_mode" "no")
(match_test "TARGET_MICROMIPS"))
(const_int 2)
@@ -580,95 +660,8 @@
(const_int 20)
(match_test "Pmode == SImode")
(const_int 16)
- ] (const_int 24))
-
- ;; "Ghost" instructions occupy no space.
- (eq_attr "type" "ghost")
- (const_int 0)
-
- ;; GOT loads are extended MIPS16 instructions and 4-byte
- ;; microMIPS instructions.
- (eq_attr "got" "load")
- (const_int 4)
-
- ;; A GOT load followed by an add of $gp.
- (eq_attr "got" "xgot_high")
- (const_int 8)
-
- ;; In general, constant-pool loads are extended instructions.
- (eq_attr "move_type" "loadpool")
- (const_int 4)
-
- ;; SHIFT_SHIFTs are decomposed into two separate instructions.
- ;; They are extended instructions on MIPS16 targets.
- (eq_attr "move_type" "shift_shift")
- (const_int 8)
-
- ;; Check for doubleword moves that are decomposed into two
- ;; instructions. The individual instructions are unextended
- ;; MIPS16 ones or 2-byte microMIPS ones.
- (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
- (eq_attr "dword_mode" "yes"))
- (if_then_else (match_test "TARGET_COMPRESSION")
- (const_int 4)
- (const_int 8))
-
- ;; Doubleword CONST{,N} moves are split into two word
- ;; CONST{,N} moves.
- (and (eq_attr "move_type" "const,constN")
- (eq_attr "dword_mode" "yes"))
- (symbol_ref "mips_split_const_insns (operands[1]) * BASE_INSN_LENGTH")
-
- ;; Otherwise, constants, loads and stores are handled by external
- ;; routines.
- (eq_attr "move_type" "const,constN")
- (symbol_ref "mips_const_insns (operands[1]) * BASE_INSN_LENGTH")
- (eq_attr "move_type" "load,fpload")
- (symbol_ref "mips_load_store_insns (operands[1], insn)
- * BASE_INSN_LENGTH")
- (eq_attr "move_type" "store,fpstore")
- (symbol_ref "mips_load_store_insns (operands[0], insn)
- * BASE_INSN_LENGTH
- + (TARGET_FIX_24K ? NOP_INSN_LENGTH : 0)")
-
- ;; In the worst case, a call macro will take 8 instructions:
- ;;
- ;; lui $25,%call_hi(FOO)
- ;; addu $25,$25,$28
- ;; lw $25,%call_lo(FOO)($25)
- ;; nop
- ;; jalr $25
- ;; nop
- ;; lw $gp,X($sp)
- ;; nop
- (eq_attr "jal_macro" "yes")
- (const_int 32)
-
- ;; Various VR4120 errata require a nop to be inserted after a macc
- ;; instruction. The assembler does this for us, so account for
- ;; the worst-case length here.
- (and (eq_attr "type" "imadd")
- (match_test "TARGET_FIX_VR4120"))
- (const_int 8)
-
- ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
- ;; the result of the second one is missed. The assembler should work
- ;; around this by inserting a nop after the first dmult.
- (and (eq_attr "type" "imul,imul3")
- (and (eq_attr "mode" "DI")
- (match_test "TARGET_FIX_VR4120")))
- (const_int 8)
-
- (eq_attr "type" "idiv,idiv3")
- (symbol_ref "mips_idiv_insns () * BASE_INSN_LENGTH")
-
- (not (eq_attr "sync_mem" "none"))
- (symbol_ref "mips_sync_loop_insns (insn, operands)
- * BASE_INSN_LENGTH")
-
- (match_test "TARGET_MIPS16")
- (const_int 2)
- ] (const_int 4)))
+ ] (const_int 24))]
+ (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH")))
;; Attribute describing the processor.
(define_enum_attr "cpu" "processor"
@@ -701,16 +694,11 @@
(const_string "hilo")]
(const_string "none")))
-;; Is it a single instruction?
-(define_attr "single_insn" "no,yes"
- (symbol_ref "(get_attr_length (insn) == (TARGET_MIPS16 ? 2 : 4)
- ? SINGLE_INSN_YES : SINGLE_INSN_NO)"))
-
;; Can the instruction be put into a delay slot?
(define_attr "can_delay" "no,yes"
(if_then_else (and (eq_attr "type" "!branch,call,jump")
- (and (eq_attr "hazard" "none")
- (eq_attr "single_insn" "yes")))
+ (eq_attr "hazard" "none")
+ (match_test "get_attr_insn_count (insn) == 1"))
(const_string "yes")
(const_string "no")))
@@ -755,7 +743,9 @@
;; This mode iterator allows :MOVECC to be used anywhere that a
;; conditional-move-type condition is needed.
(define_mode_iterator MOVECC [SI (DI "TARGET_64BIT")
- (CC "TARGET_HARD_FLOAT && !TARGET_LOONGSON_2EF")])
+ (CC "TARGET_HARD_FLOAT
+ && !TARGET_LOONGSON_2EF
+ && !TARGET_MIPS5900")])
;; 32-bit integer moves for which we provide move patterns.
(define_mode_iterator IMOVE32
@@ -1417,7 +1407,7 @@
"mul.<fmt>\t%0,%1,%2\;nop"
[(set_attr "type" "fmul")
(set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
(define_insn "mulv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=f")
@@ -1478,7 +1468,7 @@
[(set (match_operand:GPR 0 "register_operand")
(mult:GPR (match_operand:GPR 1 "register_operand")
(match_operand:GPR 2 "register_operand")))]
- ""
+ "ISA_HAS_<D>MULT"
{
rtx lo;
@@ -1524,7 +1514,7 @@
{
if (which_alternative == 1)
return "<d>mult\t%1,%2";
- if (<MODE>mode == SImode && TARGET_MIPS3900)
+ if (<MODE>mode == SImode && (TARGET_MIPS3900 || TARGET_MIPS5900))
return "mult\t%0,%1,%2";
return "<d>mul\t%0,%1,%2";
}
@@ -1558,7 +1548,7 @@
[(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
(mult:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d")))]
- "!TARGET_FIX_R4000"
+ "ISA_HAS_<D>MULT && !TARGET_FIX_R4000"
"<d>mult\t%1,%2"
[(set_attr "type" "imul")
(set_attr "mode" "<MODE>")])
@@ -1568,11 +1558,11 @@
(mult:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d")))
(clobber (match_scratch:GPR 3 "=l"))]
- "TARGET_FIX_R4000"
+ "ISA_HAS_<D>MULT && TARGET_FIX_R4000"
"<d>mult\t%1,%2\;mflo\t%0"
[(set_attr "type" "imul")
(set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
;; On the VR4120 and VR4130, it is better to use "mtlo $0; macc" instead
;; of "mult; mflo". They have the same latency, but the first form gives
@@ -1632,7 +1622,7 @@
[(set_attr "type" "imadd")
(set_attr "accum_in" "3")
(set_attr "mode" "SI")
- (set_attr "length" "4,8")])
+ (set_attr "insn_count" "1,2")])
;; The same idea applies here. The middle alternative needs one less
;; clobber than the final alternative, so we add "*?" as a counterweight.
@@ -1651,7 +1641,7 @@
[(set_attr "type" "imadd")
(set_attr "accum_in" "3")
(set_attr "mode" "SI")
- (set_attr "length" "4,4,8")])
+ (set_attr "insn_count" "1,1,2")])
;; Split *mul_acc_si if both the source and destination accumulator
;; values are GPRs.
@@ -1732,7 +1722,7 @@
""
[(set_attr "type" "imadd")
(set_attr "accum_in" "1")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
;; Patterns generated by the define_peephole2 below.
@@ -1868,7 +1858,7 @@
[(set_attr "type" "imadd")
(set_attr "accum_in" "1")
(set_attr "mode" "SI")
- (set_attr "length" "4,8")])
+ (set_attr "insn_count" "1,2")])
;; Split *mul_sub_si if both the source and destination accumulator
;; values are GPRs.
@@ -1949,7 +1939,7 @@
"mult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
[(set_attr "type" "imul")
(set_attr "mode" "SI")
- (set_attr "length" "12")])
+ (set_attr "insn_count" "3")])
(define_insn_and_split "<u>mulsidi3_64bit"
[(set (match_operand:DI 0 "register_operand" "=d")
@@ -1968,10 +1958,10 @@
}
[(set_attr "type" "imul")
(set_attr "mode" "SI")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "ISA_HAS_EXT_INS")
- (const_int 16)
- (const_int 28)))])
+ (const_int 4)
+ (const_int 7)))])
(define_expand "<u>mulsidi3_64bit_mips16"
[(set (match_operand:DI 0 "register_operand")
@@ -2035,7 +2025,7 @@
(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))
(sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))
(clobber (match_scratch:DI 3 "=l"))]
- "TARGET_64BIT && ISA_HAS_DMUL3"
+ "ISA_HAS_DMUL3"
"dmul\t%0,%1,%2"
[(set_attr "type" "imul3")
(set_attr "mode" "DI")])
@@ -2122,7 +2112,7 @@
}
[(set_attr "type" "imul")
(set_attr "mode" "SI")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
(define_expand "<su>mulsi3_highpart_split"
[(set (match_operand:SI 0 "register_operand")
@@ -2189,7 +2179,7 @@
(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
(any_extend:TI (match_operand:DI 2 "register_operand")))
(const_int 64))))]
- "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+ "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
{
if (TARGET_MIPS16)
emit_insn (gen_<su>muldi3_highpart_split (operands[0], operands[1],
@@ -2208,7 +2198,7 @@
(any_extend:TI (match_operand:DI 2 "register_operand" "d")))
(const_int 64))))
(clobber (match_scratch:DI 3 "=l"))]
- "TARGET_64BIT
+ "ISA_HAS_DMULT
&& !TARGET_MIPS16
&& !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
{ return TARGET_FIX_R4000 ? "dmult<u>\t%1,%2\n\tmfhi\t%0" : "#"; }
@@ -2221,7 +2211,7 @@
}
[(set_attr "type" "imul")
(set_attr "mode" "DI")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
(define_expand "<su>muldi3_highpart_split"
[(set (match_operand:DI 0 "register_operand")
@@ -2244,7 +2234,7 @@
[(set (match_operand:TI 0 "register_operand")
(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
(any_extend:TI (match_operand:DI 2 "register_operand"))))]
- "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+ "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
{
rtx hilo;
@@ -2266,7 +2256,7 @@
[(set (match_operand:TI 0 "muldiv_target_operand" "=x")
(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
(any_extend:TI (match_operand:DI 2 "register_operand" "d"))))]
- "TARGET_64BIT
+ "ISA_HAS_DMULT
&& !TARGET_FIX_R4000
&& !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
"dmult<u>\t%1,%2"
@@ -2278,13 +2268,13 @@
(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
(any_extend:TI (match_operand:DI 2 "register_operand" "d"))))
(clobber (match_scratch:TI 3 "=x"))]
- "TARGET_64BIT
+ "ISA_HAS_DMULT
&& TARGET_FIX_R4000
&& !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
"dmult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
[(set_attr "type" "imul")
(set_attr "mode" "DI")
- (set_attr "length" "12")])
+ (set_attr "insn_count" "3")])
;; The R4650 supports a 32-bit multiply/ 64-bit accumulate
;; instruction. The HI/LO registers are used as a 64-bit accumulator.
@@ -2535,10 +2525,10 @@
}
[(set_attr "type" "fdiv")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
(define_insn "*recip<mode>3"
[(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2553,10 +2543,10 @@
}
[(set_attr "type" "frdiv")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
;; VR4120 errata MD(A1): signed division instructions do not work correctly
;; with negative operands. We use special libgcc functions instead.
@@ -2574,7 +2564,7 @@
(set (match_operand:GPR 3 "register_operand" "=d")
(mod:GPR (match_dup 1)
(match_dup 2)))]
- "!TARGET_FIX_VR4120"
+ "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120"
"#"
"&& ((TARGET_MIPS16 && cse_not_expected) || reload_completed)"
[(const_int 0)]
@@ -2586,7 +2576,8 @@
}
[(set_attr "type" "idiv")
(set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ ;; Worst case for MIPS16.
+ (set_attr "insn_count" "3")])
;; See the comment above "divmod<mode>4" for the MIPS16 handling.
(define_insn_and_split "udivmod<mode>4"
@@ -2596,7 +2587,7 @@
(set (match_operand:GPR 3 "register_operand" "=d")
(umod:GPR (match_dup 1)
(match_dup 2)))]
- ""
+ "ISA_HAS_<D>DIV"
"#"
"(TARGET_MIPS16 && cse_not_expected) || reload_completed"
[(const_int 0)]
@@ -2606,9 +2597,10 @@
emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
DONE;
}
- [(set_attr "type" "idiv")
- (set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "<MODE>")
+ ;; Worst case for MIPS16.
+ (set_attr "insn_count" "3")])
(define_expand "<u>divmod<mode>4_split"
[(set (match_operand:GPR 0 "register_operand")
@@ -2641,7 +2633,7 @@
[(any_div:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d"))]
UNSPEC_SET_HILO))]
- ""
+ "ISA_HAS_<GPR:D>DIV"
{ return mips_output_division ("<GPR:d>div<u>\t%.,%1,%2", operands); }
[(set_attr "type" "idiv")
(set_attr "mode" "<GPR:MODE>")])
@@ -2668,10 +2660,10 @@
}
[(set_attr "type" "fsqrt")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
(define_insn "*rsqrt<mode>a"
[(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2686,10 +2678,10 @@
}
[(set_attr "type" "frsqrt")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
(define_insn "*rsqrt<mode>b"
[(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2704,10 +2696,10 @@
}
[(set_attr "type" "frsqrt")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
;;
;; ....................
@@ -3500,7 +3492,7 @@
[(set_attr "type" "fcvt")
(set_attr "mode" "DF")
(set_attr "cnv_mode" "D2I")
- (set_attr "length" "36")])
+ (set_attr "insn_count" "9")])
(define_expand "fix_truncsfsi2"
[(set (match_operand:SI 0 "register_operand")
@@ -3537,7 +3529,7 @@
[(set_attr "type" "fcvt")
(set_attr "mode" "SF")
(set_attr "cnv_mode" "S2I")
- (set_attr "length" "36")])
+ (set_attr "insn_count" "9")])
(define_insn "fix_truncdfdi2"
@@ -4015,7 +4007,7 @@
operands[2] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
operands[3] = mips_unspec_address (operands[1], SYMBOL_64_MID);
}
- [(set_attr "length" "20")])
+ [(set_attr "insn_count" "5")])
;; Use a scratch register to reduce the latency of the above pattern
;; on superscalar machines. The optimized sequence is:
@@ -4070,7 +4062,7 @@
operands[3] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
operands[4] = mips_unspec_address (operands[1], SYMBOL_64_LOW);
}
- [(set_attr "length" "24")])
+ [(set_attr "insn_count" "6")])
;; Split HIGHs into:
;;
@@ -5080,7 +5072,7 @@
return ".cprestore\t%1";
}
[(set_attr "type" "store")
- (set_attr "length" "4,12")])
+ (set_attr "insn_count" "1,3")])
(define_insn "use_cprestore_<mode>"
[(set (reg:P CPRESTORE_SLOT_REGNUM)
@@ -5141,7 +5133,7 @@
"\tjr.hb\t$31\n"
"\tnop%>%)";
}
- [(set_attr "length" "20")])
+ [(set_attr "insn_count" "5")])
;; Cache operations for R4000-style caches.
(define_insn "mips_cache"
@@ -5334,8 +5326,7 @@
;; not have and immediate). We recognize a shift of a load in order
;; to make it simple enough for combine to understand.
;;
-;; The length here is the worst case: the length of the split version
-;; will be more accurate.
+;; The instruction count here is the worst case.
(define_insn_and_split ""
[(set (match_operand:SI 0 "register_operand" "=d")
(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
@@ -5348,7 +5339,8 @@
""
[(set_attr "type" "load")
(set_attr "mode" "SI")
- (set_attr "length" "8")])
+ (set (attr "insn_count")
+ (symbol_ref "mips_load_store_insns (operands[1], insn) + 2"))])
(define_insn "rotr<mode>3"
[(set (match_operand:GPR 0 "register_operand" "=d")
@@ -5987,7 +5979,7 @@
return "j\t%4";
}
- [(set_attr "length" "32")])
+ [(set_attr "insn_count" "16")])
;; For TARGET_USE_GOT, we save the gp in the jmp_buf as well.
;; While it is possible to either pull it off the stack (in the
@@ -6878,11 +6870,8 @@
(set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
""
[(set_attr "type" "unknown")
- ; Since rdhwr always generates a trap for now, putting it in a delay
- ; slot would make the kernel's emulation of it much slower.
- (set_attr "can_delay" "no")
(set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
(define_insn "*tls_get_tp_<mode>_split"
[(set (reg:P TLS_GET_TP_REGNUM)
@@ -6890,7 +6879,8 @@
"HAVE_AS_TLS && !TARGET_MIPS16"
".set\tpush\;.set\tmips32r2\t\;rdhwr\t$3,$29\;.set\tpop"
[(set_attr "type" "unknown")
- ; See tls_get_tp_<mode>
+ ; Since rdhwr always generates a trap for now, putting it in a delay
+ ; slot would make the kernel's emulation of it much slower.
(set_attr "can_delay" "no")
(set_attr "mode" "<MODE>")])
@@ -6922,7 +6912,7 @@
(set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
""
[(set_attr "type" "multi")
- (set_attr "length" "8")
+ (set_attr "insn_count" "4")
(set_attr "mode" "<MODE>")])
(define_insn "*tls_get_tp_mips16_call_<mode>"
@@ -6934,7 +6924,7 @@
"HAVE_AS_TLS && TARGET_MIPS16"
{ return MIPS_CALL ("jal", operands, 0, -1); }
[(set_attr "type" "call")
- (set_attr "length" "6")
+ (set_attr "insn_count" "3")
(set_attr "mode" "<MODE>")])
;; Named pattern for expanding thread pointer reference.
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index e11710db3c0..08ab29b1810 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -141,6 +141,10 @@ membedded-data
Target Report Var(TARGET_EMBEDDED_DATA)
Use ROM instead of RAM
+meva
+Target Report Var(TARGET_EVA)
+Use Enhanced Virtual Addressing instructions
+
mexplicit-relocs
Target Report Mask(EXPLICIT_RELOCS)
Use NewABI-style %reloc() assembly operators
diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c
index 1af09e559b0..bd37067dfc4 100644
--- a/gcc/config/mmix/mmix.c
+++ b/gcc/config/mmix/mmix.c
@@ -313,7 +313,7 @@ mmix_init_machine_status (void)
return ggc_alloc_cleared_machine_function ();
}
-/* DATA_ALIGNMENT.
+/* DATA_ABI_ALIGNMENT.
We have trouble getting the address of stuff that is located at other
than 32-bit alignments (GETA requirements), so try to give everything
at least 32-bit alignment. */
diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h
index 4ca1a2b8c86..c5edc5777a9 100644
--- a/gcc/config/mmix/mmix.h
+++ b/gcc/config/mmix/mmix.h
@@ -164,7 +164,7 @@ struct GTY(()) machine_function
/* Copied from elfos.h. */
#define MAX_OFILE_ALIGNMENT (32768 * 8)
-#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \
+#define DATA_ABI_ALIGNMENT(TYPE, BASIC_ALIGN) \
mmix_data_alignment (TYPE, BASIC_ALIGN)
#define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index 2e18bebf3d8..c2ed7389bc4 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -647,6 +647,15 @@ rl78_addr_space_pointer_mode (addr_space_t addrspace)
}
}
+/* Returns TRUE for valid addresses. */
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE rl78_valid_pointer_mode
+static bool
+rl78_valid_pointer_mode (enum machine_mode m)
+{
+ return (m == HImode || m == SImode);
+}
+
/* Return the appropriate mode for a named address address. */
#undef TARGET_ADDR_SPACE_ADDRESS_MODE
#define TARGET_ADDR_SPACE_ADDRESS_MODE rl78_addr_space_address_mode
@@ -2730,6 +2739,16 @@ rl78_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
}
+
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE rl78_unwind_word_mode
+
+static enum machine_mode
+rl78_unwind_word_mode (void)
+{
+ return HImode;
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rl78.h"
diff --git a/gcc/config/rl78/rl78.md b/gcc/config/rl78/rl78.md
index b3cfe6d1bbc..efc26210498 100644
--- a/gcc/config/rl78/rl78.md
+++ b/gcc/config/rl78/rl78.md
@@ -235,6 +235,24 @@
[(set_attr "valloc" "macax")]
)
+(define_expand "mulqi3"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (mult:QI (match_operand:QI 1 "general_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ ]
+ "" ; mulu supported by all targets
+ ""
+)
+
+(define_expand "mulhi3"
+ [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (match_operand:HI 1 "general_operand" "")
+ (match_operand:HI 2 "nonmemory_operand" "")))
+ ]
+ "! RL78_MUL_NONE"
+ ""
+)
+
(define_expand "mulsi3"
[(set (match_operand:SI 0 "register_operand" "=&v")
(mult:SI (match_operand:SI 1 "nonmemory_operand" "vi")
@@ -244,6 +262,58 @@
""
)
+(define_insn "*mulqi3_rl78"
+ [(set (match_operand:QI 0 "register_operand" "=&v")
+ (mult:QI (match_operand:QI 1 "general_operand" "+viU")
+ (match_operand:QI 2 "general_operand" "vi")))
+ ]
+ "" ; mulu supported by all targets
+ "; mulqi macro %0 = %1 * %2
+ mov a, %h1
+ mov x, a
+ mov a, %h2
+ mulu x ; ax = a * x
+ mov a, x
+ mov %h0, a
+ ; end of mulqi macro"
+;; [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_rl78"
+ [(set (match_operand:HI 0 "register_operand" "=&v")
+ (mult:HI (match_operand:HI 1 "general_operand" "+viU")
+ (match_operand:HI 2 "general_operand" "vi")))
+ ]
+ "RL78_MUL_RL78"
+ "; mulhi macro %0 = %1 * %2
+ movw ax, %h1
+ movw bc, %h2
+ mulhu ; bcax = bc * ax
+ movw %h0, ax
+ ; end of mulhi macro"
+;; [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_g13"
+ [(set (match_operand:HI 0 "register_operand" "=&v")
+ (mult:HI (match_operand:HI 1 "general_operand" "+viU")
+ (match_operand:HI 2 "general_operand" "vi")))
+ ]
+ "RL78_MUL_G13"
+ "; mulhi macro %0 = %1 * %2
+ mov a, #0x00
+ mov !0xf00e8, a ; MDUC
+ movw ax, %h1
+ movw 0xffff0, ax ; MDAL
+ movw ax, %h2
+ movw 0xffff2, ax ; MDAH
+ nop ; mdb = mdal * mdah
+ movw ax, 0xffff6 ; MDBL
+ movw %h0, ax
+ ; end of mulhi macro"
+;; [(set_attr "valloc" "umul")]
+)
+
;; 0xFFFF0 is MACR(L). 0xFFFF2 is MACR(H) but we don't care about it
;; because we're only using the lower 16 bits (which is the upper 16
;; bits of the result).
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index 19a4ae9297d..4b91c5c5e24 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -323,15 +323,31 @@
#ifdef _ARCH_PWR8
/* Vector additions added in ISA 2.07. */
+#define vec_eqv __builtin_vec_eqv
+#define vec_nand __builtin_vec_nand
+#define vec_orc __builtin_vec_orc
#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vclz __builtin_vec_vclz
+#define vec_vclzb __builtin_vec_vclzb
+#define vec_vclzd __builtin_vec_vclzd
+#define vec_vclzh __builtin_vec_vclzh
+#define vec_vclzw __builtin_vec_vclzw
+#define vec_vgbbd __builtin_vec_vgbbd
#define vec_vmaxsd __builtin_vec_vmaxsd
#define vec_vmaxud __builtin_vec_vmaxud
#define vec_vminsd __builtin_vec_vminsd
#define vec_vminud __builtin_vec_vminud
+#define vec_vmrgew __builtin_vec_vmrgew
+#define vec_vmrgow __builtin_vec_vmrgow
#define vec_vpksdss __builtin_vec_vpksdss
#define vec_vpksdus __builtin_vec_vpksdus
#define vec_vpkudum __builtin_vec_vpkudum
#define vec_vpkudus __builtin_vec_vpkudus
+#define vec_vpopcnt __builtin_vec_vpopcnt
+#define vec_vpopcntb __builtin_vec_vpopcntb
+#define vec_vpopcntd __builtin_vec_vpopcntd
+#define vec_vpopcnth __builtin_vec_vpopcnth
+#define vec_vpopcntw __builtin_vec_vpopcntw
#define vec_vrld __builtin_vec_vrld
#define vec_vsld __builtin_vec_vsld
#define vec_vsrad __builtin_vec_vsrad
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 78d29001440..6607e450be3 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -128,6 +128,7 @@
UNSPEC_VUPKLS_V4SF
UNSPEC_VUPKHU_V4SF
UNSPEC_VUPKLU_V4SF
+ UNSPEC_VGBBD
])
(define_c_enum "unspecv"
@@ -941,6 +942,31 @@
"vmrglw %0,%1,%2"
[(set_attr "type" "vecperm")])
+;; Power8 vector merge even/odd
+(define_insn "p8_vmrgew"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))]
+ "TARGET_P8_VECTOR"
+ "vmrgew %0,%1,%2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "p8_vmrgow"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v"))
+ (parallel [(const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_P8_VECTOR"
+ "vmrgow %0,%1,%2"
+ [(set_attr "type" "vecperm")])
+
(define_insn "vec_widen_umult_even_v16qi"
[(set (match_operand:V8HI 0 "register_operand" "=v")
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1017,10 +1043,13 @@
;; logical ops. Have the logical ops follow the memory ops in
;; terms of whether to prefer VSX or Altivec
+;; AND has a clobber to be consistant with VSX, which adds splitters for using
+;; the GPR registers.
(define_insn "*altivec_and<mode>3"
[(set (match_operand:VM 0 "register_operand" "=v")
(and:VM (match_operand:VM 1 "register_operand" "v")
- (match_operand:VM 2 "register_operand" "v")))]
+ (match_operand:VM 2 "register_operand" "v")))
+ (clobber (match_scratch:CC 3 "=X"))]
"VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
"vand %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -1050,8 +1079,8 @@
(define_insn "*altivec_nor<mode>3"
[(set (match_operand:VM 0 "register_operand" "=v")
- (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
- (match_operand:VM 2 "register_operand" "v"))))]
+ (and:VM (not:VM (match_operand:VM 1 "register_operand" "v"))
+ (not:VM (match_operand:VM 2 "register_operand" "v"))))]
"VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
"vnor %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -2370,3 +2399,34 @@
emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
DONE;
}")
+
+
+;; Power8 vector instructions encoded as Altivec instructions
+
+;; Vector count leading zeros
+(define_insn "*p8v_clz<mode>2"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+ "TARGET_P8_VECTOR"
+ "vclz<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+;; Vector population count
+(define_insn "*p8v_popcount<mode>2"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+ "TARGET_P8_VECTOR"
+ "vpopcnt<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+;; Vector Gather Bits by Bytes by Doubleword
+(define_insn "p8v_vgbbd"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+ UNSPEC_VGBBD))]
+ "TARGET_P8_VECTOR"
+ "vgbbd %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c
index e608dce184c..1a173d0b1cc 100644
--- a/gcc/config/rs6000/driver-rs6000.c
+++ b/gcc/config/rs6000/driver-rs6000.c
@@ -167,7 +167,7 @@ elf_platform (void)
if (fd != -1)
{
- char buf[1024];
+ static char buf[1024];
ElfW(auxv_t) *av;
ssize_t n;
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 3f280581feb..79f0f0b5f00 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -136,8 +136,11 @@ extern int dot_symbols;
SET_CMODEL (CMODEL_MEDIUM); \
if (rs6000_current_cmodel != CMODEL_SMALL) \
{ \
- TARGET_NO_FP_IN_TOC = 0; \
- TARGET_NO_SUM_IN_TOC = 0; \
+ if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \
+ TARGET_NO_FP_IN_TOC \
+ = rs6000_current_cmodel == CMODEL_MEDIUM; \
+ if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \
+ TARGET_NO_SUM_IN_TOC = 0; \
} \
} \
} \
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 078c9387350..12a602b78c4 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -207,7 +207,7 @@
if (!REG_P (op))
return 0;
- if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
+ if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
return 1;
return INT_REGNO_P (REGNO (op));
@@ -1121,9 +1121,16 @@
GET_MODE (XEXP (op, 0))),
1"))))
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+;; If we're assuming that FP operations cannot generate user-visible traps,
+;; then on e500 we can use the ordered-signaling instructions to implement
+;; the unordered-quiet FP comparison predicates modulo a reversal.
(define_predicate "rs6000_cbranch_operator"
(if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS")
- (match_operand 0 "ordered_comparison_operator")
+ (if_then_else (match_test "flag_trapping_math")
+ (match_operand 0 "ordered_comparison_operator")
+ (ior (match_operand 0 "ordered_comparison_operator")
+ (match_code ("unlt,unle,ungt,unge"))))
(match_operand 0 "comparison_operator")))
;; Return 1 if OP is a comparison operation that is valid for an SCC insn --
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index 7a80eff8924..1a5a709751d 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -1234,10 +1234,23 @@ BU_VSX_OVERLOAD_2 (XXSPLTW, "xxspltw")
BU_VSX_OVERLOAD_X (LD, "ld")
BU_VSX_OVERLOAD_X (ST, "st")
+/* 1 argument VSX instructions added in ISA 2.07. */
+BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn)
+BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn)
+
/* 1 argument altivec instructions added in ISA 2.07. */
BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2)
BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw)
BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw)
+BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2)
+BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2)
+BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2)
+BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2)
+BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2)
+BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2)
+BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2)
+BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2)
+BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd)
/* 2 argument altivec instructions added in ISA 2.07. */
BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3)
@@ -1245,6 +1258,8 @@ BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3)
BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3)
BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3)
BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3)
+BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew)
+BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow)
BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum)
BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss)
BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus)
@@ -1255,6 +1270,27 @@ BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3)
BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3)
BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3)
+BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3)
+BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3)
+BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3)
+BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3)
+BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3)
+BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3)
+
+BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3)
+BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3)
+BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3)
+BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3)
+BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3)
+BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3)
+
+BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3)
+BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3)
+BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3)
+BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3)
+BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3)
+BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3)
+
/* Vector comparison instructions added in ISA 2.07. */
BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di)
BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di)
@@ -1268,13 +1304,29 @@ BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p)
/* ISA 2.07 vector overloaded 1 argument functions. */
BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw")
BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw")
+BU_P8V_OVERLOAD_1 (VCLZ, "vclz")
+BU_P8V_OVERLOAD_1 (VCLZB, "vclzb")
+BU_P8V_OVERLOAD_1 (VCLZH, "vclzh")
+BU_P8V_OVERLOAD_1 (VCLZW, "vclzw")
+BU_P8V_OVERLOAD_1 (VCLZD, "vclzd")
+BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt")
+BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb")
+BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth")
+BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw")
+BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd")
+BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd")
/* ISA 2.07 vector overloaded 2 argument functions. */
+BU_P8V_OVERLOAD_2 (EQV, "eqv")
+BU_P8V_OVERLOAD_2 (NAND, "nand")
+BU_P8V_OVERLOAD_2 (ORC, "orc")
BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm")
BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd")
BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud")
BU_P8V_OVERLOAD_2 (VMINSD, "vminsd")
BU_P8V_OVERLOAD_2 (VMINUD, "vminud")
+BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew")
+BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow")
BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss")
BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus")
BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum")
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index 801b5bb225d..593b772ebd1 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -3515,6 +3515,404 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+ /* Power8 vector overloaded functions. */
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_V16QI, 0, 0, 0 },
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
+
/* Crypto builtins. */
{ CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
@@ -3822,11 +4220,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
TREE_USED (decl) = 1;
TREE_TYPE (decl) = arg1_type;
TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
- DECL_INITIAL (decl) = arg1;
- stmt = build1 (DECL_EXPR, arg1_type, decl);
- TREE_ADDRESSABLE (decl) = 1;
- SET_EXPR_LOCATION (stmt, loc);
- stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+ if (c_dialect_cxx ())
+ {
+ stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+ NULL_TREE, NULL_TREE);
+ SET_EXPR_LOCATION (stmt, loc);
+ }
+ else
+ {
+ DECL_INITIAL (decl) = arg1;
+ stmt = build1 (DECL_EXPR, arg1_type, decl);
+ TREE_ADDRESSABLE (decl) = 1;
+ SET_EXPR_LOCATION (stmt, loc);
+ stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+ }
innerptrtype = build_pointer_type (arg1_inner_type);
@@ -3901,11 +4308,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
TREE_USED (decl) = 1;
TREE_TYPE (decl) = arg1_type;
TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
- DECL_INITIAL (decl) = arg1;
- stmt = build1 (DECL_EXPR, arg1_type, decl);
- TREE_ADDRESSABLE (decl) = 1;
- SET_EXPR_LOCATION (stmt, loc);
- stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+ if (c_dialect_cxx ())
+ {
+ stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+ NULL_TREE, NULL_TREE);
+ SET_EXPR_LOCATION (stmt, loc);
+ }
+ else
+ {
+ DECL_INITIAL (decl) = arg1;
+ stmt = build1 (DECL_EXPR, arg1_type, decl);
+ TREE_ADDRESSABLE (decl) = 1;
+ SET_EXPR_LOCATION (stmt, loc);
+ stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+ }
innerptrtype = build_pointer_type (arg1_inner_type);
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index e143a4ca203..d528a4fd87a 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -30,21 +30,22 @@
/* Processor type. Order must match cpu attribute in MD file. */
enum processor_type
{
- PROCESSOR_RS64A,
- PROCESSOR_MPCCORE,
- PROCESSOR_PPC403,
- PROCESSOR_PPC405,
- PROCESSOR_PPC440,
- PROCESSOR_PPC476,
PROCESSOR_PPC601,
PROCESSOR_PPC603,
PROCESSOR_PPC604,
PROCESSOR_PPC604e,
PROCESSOR_PPC620,
PROCESSOR_PPC630,
+
PROCESSOR_PPC750,
PROCESSOR_PPC7400,
PROCESSOR_PPC7450,
+
+ PROCESSOR_PPC403,
+ PROCESSOR_PPC405,
+ PROCESSOR_PPC440,
+ PROCESSOR_PPC476,
+
PROCESSOR_PPC8540,
PROCESSOR_PPC8548,
PROCESSOR_PPCE300C2,
@@ -53,16 +54,21 @@ enum processor_type
PROCESSOR_PPCE500MC64,
PROCESSOR_PPCE5500,
PROCESSOR_PPCE6500,
+
PROCESSOR_POWER4,
PROCESSOR_POWER5,
PROCESSOR_POWER6,
PROCESSOR_POWER7,
+ PROCESSOR_POWER8,
+
+ PROCESSOR_RS64A,
+ PROCESSOR_MPCCORE,
PROCESSOR_CELL,
PROCESSOR_PPCA2,
- PROCESSOR_TITAN,
- PROCESSOR_POWER8
+ PROCESSOR_TITAN
};
+
/* FP processor type. */
enum fpu_type_t
{
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index d9bcf1a41ed..02836ecea6d 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx);
extern rtx find_addr_reg (rtx);
extern rtx gen_easy_altivec_constant (rtx);
extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_128bit (rtx *);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
@@ -70,6 +71,8 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
extern int registers_ok_for_quad_peep (rtx, rtx);
extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
+extern bool direct_move_p (rtx, rtx);
+extern bool quad_load_store_p (rtx, rtx);
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
enum reg_class);
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index cb6876051d7..55273ab81bd 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -292,6 +292,39 @@ typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx);
don't link in rs6000-c.c, so we can't call it directly. */
void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
+/* Simplfy register classes into simpler classifications. We assume
+ GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+ check for standard register classes (gpr/floating/altivec/vsx) and
+ floating/vector classes (float/altivec/vsx). */
+
+enum rs6000_reg_type {
+ NO_REG_TYPE,
+ PSEUDO_REG_TYPE,
+ GPR_REG_TYPE,
+ VSX_REG_TYPE,
+ ALTIVEC_REG_TYPE,
+ FPR_REG_TYPE,
+ SPR_REG_TYPE,
+ CR_REG_TYPE,
+ SPE_ACC_TYPE,
+ SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type. */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+ purpose, floating point, altivec, and VSX registers). */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+/* Direct moves to/from vsx/gpr registers that need an additional register to
+ do the move. */
+static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES];
+static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES];
+static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES];
+
/* Target cpu costs. */
@@ -1042,6 +1075,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *,
static void rs6000_print_builtin_options (FILE *, int, const char *,
HOST_WIDE_INT);
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+ enum rs6000_reg_type,
+ enum machine_mode,
+ secondary_reload_info *,
+ bool);
+
/* Hash table stuff for keeping track of TOC entries. */
struct GTY(()) toc_hash_struct
@@ -1587,8 +1627,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
return ALTIVEC_REGNO_P (last_regno);
}
- /* Allow TImode in all VSX registers if the user asked for it. Note, PTImode
- can only go in GPRs. */
+ /* Allow TImode in all VSX registers if the user asked for it. */
if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno))
return 1;
@@ -2154,6 +2193,36 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
+ /* Precalculate register class to simpler reload register class. We don't
+ need all of the register classes that are combinations of different
+ classes, just the simple ones that have constraint letters. */
+ for (c = 0; c < N_REG_CLASSES; c++)
+ reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+ reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+ reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+ reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+ reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+ reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+ reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+ reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+ if (TARGET_VSX)
+ {
+ reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
+ }
+ else
+ {
+ reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
+ }
+
/* Precalculate vector information, this must be set up before the
rs6000_hard_regno_nregs_internal below. */
for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2305,7 +2374,15 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
if (TARGET_LFIWZX)
rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
- /* Set up the reload helper functions. */
+ /* Setup the direct move combinations. */
+ for (m = 0; m < NUM_MACHINE_MODES; ++m)
+ {
+ reload_fpr_gpr[m] = CODE_FOR_nothing;
+ reload_gpr_vsx[m] = CODE_FOR_nothing;
+ reload_vsx_gpr[m] = CODE_FOR_nothing;
+ }
+
+ /* Set up the reload helper and direct move functions. */
if (TARGET_VSX || TARGET_ALTIVEC)
{
if (TARGET_64BIT)
@@ -2329,11 +2406,47 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_di_store;
rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_di_load;
}
+ if (TARGET_P8_VECTOR)
+ {
+ rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_di_store;
+ rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_di_load;
+ rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_di_store;
+ rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_di_load;
+ }
if (TARGET_VSX_TIMODE)
{
rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_di_store;
rs6000_vector_reload[TImode][1] = CODE_FOR_reload_ti_di_load;
}
+ if (TARGET_DIRECT_MOVE)
+ {
+ if (TARGET_POWERPC64)
+ {
+ reload_gpr_vsx[TImode] = CODE_FOR_reload_gpr_from_vsxti;
+ reload_gpr_vsx[V2DFmode] = CODE_FOR_reload_gpr_from_vsxv2df;
+ reload_gpr_vsx[V2DImode] = CODE_FOR_reload_gpr_from_vsxv2di;
+ reload_gpr_vsx[V4SFmode] = CODE_FOR_reload_gpr_from_vsxv4sf;
+ reload_gpr_vsx[V4SImode] = CODE_FOR_reload_gpr_from_vsxv4si;
+ reload_gpr_vsx[V8HImode] = CODE_FOR_reload_gpr_from_vsxv8hi;
+ reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi;
+ reload_gpr_vsx[SFmode] = CODE_FOR_reload_gpr_from_vsxsf;
+
+ reload_vsx_gpr[TImode] = CODE_FOR_reload_vsx_from_gprti;
+ reload_vsx_gpr[V2DFmode] = CODE_FOR_reload_vsx_from_gprv2df;
+ reload_vsx_gpr[V2DImode] = CODE_FOR_reload_vsx_from_gprv2di;
+ reload_vsx_gpr[V4SFmode] = CODE_FOR_reload_vsx_from_gprv4sf;
+ reload_vsx_gpr[V4SImode] = CODE_FOR_reload_vsx_from_gprv4si;
+ reload_vsx_gpr[V8HImode] = CODE_FOR_reload_vsx_from_gprv8hi;
+ reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi;
+ reload_vsx_gpr[SFmode] = CODE_FOR_reload_vsx_from_gprsf;
+ }
+ else
+ {
+ reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi;
+ reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd;
+ reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf;
+ }
+ }
}
else
{
@@ -2356,6 +2469,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_si_store;
rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_si_load;
}
+ if (TARGET_P8_VECTOR)
+ {
+ rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_si_store;
+ rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_si_load;
+ rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_si_store;
+ rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_si_load;
+ }
if (TARGET_VSX_TIMODE)
{
rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_si_store;
@@ -2916,6 +3036,16 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
}
+ /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
+ silently turn off quad memory mode. */
+ if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+ warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+ rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+ }
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
@@ -3042,7 +3172,8 @@ rs6000_option_override_internal (bool global_init_p)
/* Place FP constants in the constant pool instead of TOC
if section anchors enabled. */
- if (flag_section_anchors)
+ if (flag_section_anchors
+ && !global_options_set.x_TARGET_NO_FP_IN_TOC)
TARGET_NO_FP_IN_TOC = 1;
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
@@ -4082,6 +4213,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
switch (fn)
{
+ case BUILT_IN_CLZIMAX:
+ case BUILT_IN_CLZLL:
+ case BUILT_IN_CLZL:
+ case BUILT_IN_CLZ:
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+ {
+ if (out_mode == QImode && out_n == 16)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+ else if (out_mode == HImode && out_n == 8)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+ else if (out_mode == SImode && out_n == 4)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+ else if (out_mode == DImode && out_n == 2)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+ }
+ break;
case BUILT_IN_COPYSIGN:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
@@ -4097,6 +4244,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
break;
+ case BUILT_IN_POPCOUNTIMAX:
+ case BUILT_IN_POPCOUNTLL:
+ case BUILT_IN_POPCOUNTL:
+ case BUILT_IN_POPCOUNT:
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+ {
+ if (out_mode == QImode && out_n == 16)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+ else if (out_mode == HImode && out_n == 8)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+ else if (out_mode == SImode && out_n == 4)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+ else if (out_mode == DImode && out_n == 2)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+ }
+ break;
case BUILT_IN_SQRT:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
@@ -4955,8 +5118,11 @@ rs6000_expand_vector_init (rtx target, rtx vals)
{
rtx freg = gen_reg_rtx (V4SFmode);
rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+ rtx cvt = ((TARGET_XSCVDPSPN)
+ ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+ : gen_vsx_xscvdpsp_scalar (freg, sreg));
- emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
+ emit_insn (cvt);
emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
}
else
@@ -5339,6 +5505,72 @@ gpr_or_gpr_p (rtx op0, rtx op1)
|| (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
}
+/* Return true if this is a move direct operation between GPR registers and
+ floating point/VSX registers. */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+ int regno0, regno1;
+
+ if (!REG_P (op0) || !REG_P (op1))
+ return false;
+
+ if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+ return false;
+
+ regno0 = REGNO (op0);
+ regno1 = REGNO (op1);
+ if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+ return false;
+
+ if (INT_REGNO_P (regno0))
+ return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+ else if (INT_REGNO_P (regno1))
+ {
+ if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+ return true;
+
+ else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if this is a load or store quad operation. */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+ bool ret;
+
+ if (!TARGET_QUAD_MEMORY)
+ ret = false;
+
+ else if (REG_P (op0) && MEM_P (op1))
+ ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+ && quad_memory_operand (op1, GET_MODE (op1))
+ && !reg_overlap_mentioned_p (op0, op1));
+
+ else if (MEM_P (op0) && REG_P (op1))
+ ret = (quad_memory_operand (op0, GET_MODE (op0))
+ && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+ else
+ ret = false;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n========== quad_load_store, return %s\n",
+ ret ? "true" : "false");
+ debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+ }
+
+ return ret;
+}
+
/* Given an address, return a constant offset term if one exists. */
static rtx
@@ -5474,91 +5706,102 @@ virtual_stack_registers_memory_p (rtx op)
&& regnum <= LAST_VIRTUAL_POINTER_REGISTER);
}
-/* Return true if memory accesses to OP are known to never straddle
- a 32k boundary. */
+/* Return true if a MODE sized memory accesses to OP plus OFFSET
+ is known to not straddle a 32k boundary. */
static bool
offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
enum machine_mode mode)
{
tree decl, type;
- unsigned HOST_WIDE_INT dsize, dalign;
+ unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
if (GET_CODE (op) != SYMBOL_REF)
return false;
+ dsize = GET_MODE_SIZE (mode);
decl = SYMBOL_REF_DECL (op);
if (!decl)
{
- if (GET_MODE_SIZE (mode) == 0)
+ if (dsize == 0)
return false;
/* -fsection-anchors loses the original SYMBOL_REF_DECL when
replacing memory addresses with an anchor plus offset. We
could find the decl by rummaging around in the block->objects
VEC for the given offset but that seems like too much work. */
- dalign = 1;
+ dalign = BITS_PER_UNIT;
if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
&& SYMBOL_REF_ANCHOR_P (op)
&& SYMBOL_REF_BLOCK (op) != NULL)
{
struct object_block *block = SYMBOL_REF_BLOCK (op);
- HOST_WIDE_INT lsb, mask;
- /* Given the alignment of the block.. */
dalign = block->alignment;
- mask = dalign / BITS_PER_UNIT - 1;
-
- /* ..and the combined offset of the anchor and any offset
- to this block object.. */
offset += SYMBOL_REF_BLOCK_OFFSET (op);
- lsb = offset & -offset;
+ }
+ else if (CONSTANT_POOL_ADDRESS_P (op))
+ {
+ /* It would be nice to have get_pool_align().. */
+ enum machine_mode cmode = get_pool_mode (op);
- /* ..find how many bits of the alignment we know for the
- object. */
- mask &= lsb - 1;
- dalign = mask + 1;
+ dalign = GET_MODE_ALIGNMENT (cmode);
}
- return dalign >= GET_MODE_SIZE (mode);
}
-
- if (DECL_P (decl))
+ else if (DECL_P (decl))
{
- if (TREE_CODE (decl) == FUNCTION_DECL)
- return true;
+ dalign = DECL_ALIGN (decl);
- if (!DECL_SIZE_UNIT (decl))
- return false;
+ if (dsize == 0)
+ {
+ /* Allow BLKmode when the entire object is known to not
+ cross a 32k boundary. */
+ if (!DECL_SIZE_UNIT (decl))
+ return false;
- if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
- return false;
+ if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
+ return false;
- dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
- if (dsize > 32768)
- return false;
+ dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+ if (dsize > 32768)
+ return false;
- dalign = DECL_ALIGN_UNIT (decl);
- return dalign >= dsize;
+ return dalign / BITS_PER_UNIT >= dsize;
+ }
}
+ else
+ {
+ type = TREE_TYPE (decl);
- type = TREE_TYPE (decl);
+ dalign = TYPE_ALIGN (type);
+ if (CONSTANT_CLASS_P (decl))
+ dalign = CONSTANT_ALIGNMENT (decl, dalign);
+ else
+ dalign = DATA_ALIGNMENT (decl, dalign);
- if (TREE_CODE (decl) == STRING_CST)
- dsize = TREE_STRING_LENGTH (decl);
- else if (TYPE_SIZE_UNIT (type)
- && host_integerp (TYPE_SIZE_UNIT (type), 1))
- dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
- else
- return false;
- if (dsize > 32768)
- return false;
+ if (dsize == 0)
+ {
+ /* BLKmode, check the entire object. */
+ if (TREE_CODE (decl) == STRING_CST)
+ dsize = TREE_STRING_LENGTH (decl);
+ else if (TYPE_SIZE_UNIT (type)
+ && host_integerp (TYPE_SIZE_UNIT (type), 1))
+ dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+ else
+ return false;
+ if (dsize > 32768)
+ return false;
+
+ return dalign / BITS_PER_UNIT >= dsize;
+ }
+ }
+
+ /* Find how many bits of the alignment we know for this access. */
+ mask = dalign / BITS_PER_UNIT - 1;
+ lsb = offset & -offset;
+ mask &= lsb - 1;
+ dalign = mask + 1;
- dalign = TYPE_ALIGN (type);
- if (CONSTANT_CLASS_P (decl))
- dalign = CONSTANT_ALIGNMENT (decl, dalign);
- else
- dalign = DATA_ALIGNMENT (decl, dalign);
- dalign /= BITS_PER_UNIT;
return dalign >= dsize;
}
@@ -5846,8 +6089,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
return force_reg (Pmode, XEXP (x, 0));
+ /* For TImode with load/store quad, restrict addresses to just a single
+ pointer, so it works with both GPRs and VSX registers. */
/* Make sure both operands are registers. */
- else if (GET_CODE (x) == PLUS)
+ else if (GET_CODE (x) == PLUS
+ && (mode != TImode || !TARGET_QUAD_MEMORY))
return gen_rtx_PLUS (Pmode,
force_reg (Pmode, XEXP (x, 0)),
force_reg (Pmode, XEXP (x, 1)));
@@ -6504,7 +6750,6 @@ use_toc_relative_ref (rtx sym)
&& ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
get_pool_mode (sym)))
|| (TARGET_CMODEL == CMODEL_MEDIUM
- && !CONSTANT_POOL_ADDRESS_P (sym)
&& SYMBOL_REF_LOCAL_P (sym)));
}
@@ -6802,6 +7047,13 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
if (reg_offset_p
&& legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
return 1;
+ /* For TImode, if we have load/store quad, only allow register indirect
+ addresses. This will allow the values to go in either GPRs or VSX
+ registers without reloading. The vector types would tend to go into VSX
+ registers, so we allow REG+REG, while TImode seems somewhat split, in that
+ some uses are GPR based, and some VSX based. */
+ if (mode == TImode && TARGET_QUAD_MEMORY)
+ return 0;
/* If not REG_OK_STRICT (before reload) let pass any stack offset. */
if (! reg_ok_strict
&& reg_offset_p
@@ -9314,20 +9566,17 @@ setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
&& cfun->va_list_gpr_size)
{
- int nregs = GP_ARG_NUM_REG - first_reg_offset;
+ int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
if (va_list_gpr_counter_field)
- {
- /* V4 va_list_gpr_size counts number of registers needed. */
- if (nregs > cfun->va_list_gpr_size)
- nregs = cfun->va_list_gpr_size;
- }
+ /* V4 va_list_gpr_size counts number of registers needed. */
+ n_gpr = cfun->va_list_gpr_size;
else
- {
- /* char * va_list instead counts number of bytes needed. */
- if (nregs > cfun->va_list_gpr_size / reg_size)
- nregs = cfun->va_list_gpr_size / reg_size;
- }
+ /* char * va_list instead counts number of bytes needed. */
+ n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
+
+ if (nregs > n_gpr)
+ nregs = n_gpr;
mem = gen_rtx_MEM (BLKmode,
plus_constant (Pmode, save_area,
@@ -12857,6 +13106,7 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
{
/* unsigned 1 argument functions. */
case CRYPTO_BUILTIN_VSBOX:
+ case P8V_BUILTIN_VGBBD:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
break;
@@ -13947,29 +14197,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
return NULL_TREE;
}
-enum reload_reg_type {
- GPR_REGISTER_TYPE,
- VECTOR_REGISTER_TYPE,
- OTHER_REGISTER_TYPE
-};
+/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
+ on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+ only work on the traditional altivec registers, note if an altivec register
+ was choosen. */
-static enum reload_reg_type
-rs6000_reload_register_type (enum reg_class rclass)
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
{
- switch (rclass)
+ HOST_WIDE_INT regno;
+ enum reg_class rclass;
+
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+
+ if (!REG_P (reg))
+ return NO_REG_TYPE;
+
+ regno = REGNO (reg);
+ if (regno >= FIRST_PSEUDO_REGISTER)
{
- case GENERAL_REGS:
- case BASE_REGS:
- return GPR_REGISTER_TYPE;
+ if (!lra_in_progress && !reload_in_progress && !reload_completed)
+ return PSEUDO_REG_TYPE;
- case FLOAT_REGS:
- case ALTIVEC_REGS:
- case VSX_REGS:
- return VECTOR_REGISTER_TYPE;
+ regno = true_regnum (reg);
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+ return PSEUDO_REG_TYPE;
+ }
- default:
- return OTHER_REGISTER_TYPE;
+ gcc_assert (regno >= 0);
+
+ if (is_altivec && ALTIVEC_REGNO_P (regno))
+ *is_altivec = true;
+
+ rclass = rs6000_regno_regclass[regno];
+ return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+ different register classe is really a simple move. */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode)
+{
+ int size;
+
+ /* Add support for various direct moves available. In this function, we only
+ look at cases where we don't need any extra registers, and one or more
+ simple move insns are issued. At present, 32-bit integers are not allowed
+ in FPR/VSX registers. Single precision binary floating is not a simple
+ move because we need to convert to the single precision memory layout.
+ The 4-byte SDmode can be moved. */
+ size = GET_MODE_SIZE (mode);
+ if (TARGET_DIRECT_MOVE
+ && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+ && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+ && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+ || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+ && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+ || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+ special direct moves that involve allocating an extra register, return the
+ insn code of the helper function if there is such a function or
+ CODE_FOR_nothing if not. */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode,
+ secondary_reload_info *sri,
+ bool altivec_p)
+{
+ bool ret = false;
+ enum insn_code icode = CODE_FOR_nothing;
+ int cost = 0;
+ int size = GET_MODE_SIZE (mode);
+
+ if (TARGET_POWERPC64)
+ {
+ if (size == 16)
+ {
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
+ 64-bit values back together. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
+ bottom 64-bit value. */
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+ }
+
+ else if (mode == SFmode)
+ {
+ if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* xscvdpspn, mfvsrd, and. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+
+ else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 2; /* mtvsrz, xscvspdpn. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+ }
}
+
+ if (TARGET_POWERPC64 && size == 16)
+ {
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
+ 64-bit values back together. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
+ bottom 64-bit value. */
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+ }
+
+ else if (!TARGET_POWERPC64 && size == 8)
+ {
+ /* Handle moving 64-bit values from GPRs to floating point registers on
+ power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+ values back together. Altivec register classes must be handled
+ specially since a different instruction is used, and the secondary
+ reload support requires a single instruction class in the scratch
+ register constraint. However, right now TFmode is not allowed in
+ Altivec registers, so the pattern will never match. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+ {
+ cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
+ icode = reload_fpr_gpr[(int)mode];
+ }
+ }
+
+ if (icode != CODE_FOR_nothing)
+ {
+ ret = true;
+ if (sri)
+ {
+ sri->icode = icode;
+ sri->extra_cost = cost;
+ }
+ }
+
+ return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+ directly (simple move) or via a pattern that uses a single extra temporary
+ (using power8's direct move in this case. */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode,
+ secondary_reload_info *sri,
+ bool altivec_p)
+{
+ /* Fall back to load/store reloads if either type is not a register. */
+ if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+ return false;
+
+ /* If we haven't allocated registers yet, assume the move can be done for the
+ standard register types. */
+ if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+ || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+ || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+ return true;
+
+ /* Moves to the same set of registers is a simple move for non-specialized
+ registers. */
+ if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+ return true;
+
+ /* Check whether a simple move can be done directly. */
+ if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+ {
+ if (sri)
+ {
+ sri->icode = CODE_FOR_nothing;
+ sri->extra_cost = 0;
+ }
+ return true;
+ }
+
+ /* Now check if we can do it in a few steps. */
+ return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+ altivec_p);
}
/* Inform reload about cases where moving X with a mode MODE to a register in
@@ -13995,11 +14442,32 @@ rs6000_secondary_reload (bool in_p,
bool default_p = false;
sri->icode = CODE_FOR_nothing;
-
- /* Convert vector loads and stores into gprs to use an additional base
- register. */
icode = rs6000_vector_reload[mode][in_p != false];
- if (icode != CODE_FOR_nothing)
+
+ if (REG_P (x) || register_operand (x, mode))
+ {
+ enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+ bool altivec_p = (rclass == ALTIVEC_REGS);
+ enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+ if (!in_p)
+ {
+ enum rs6000_reg_type exchange = to_type;
+ to_type = from_type;
+ from_type = exchange;
+ }
+
+ if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+ altivec_p))
+ {
+ icode = (enum insn_code)sri->icode;
+ default_p = false;
+ ret = NO_REGS;
+ }
+ }
+
+ /* Handle vector moves with reload helper functions. */
+ if (ret == ALL_REGS && icode != CODE_FOR_nothing)
{
ret = NO_REGS;
sri->icode = CODE_FOR_nothing;
@@ -14011,12 +14479,21 @@ rs6000_secondary_reload (bool in_p,
/* Loads to and stores from gprs can do reg+offset, and wouldn't need
an extra register in that case, but it would need an extra
- register if the addressing is reg+reg or (reg+reg)&(-16). */
+ register if the addressing is reg+reg or (reg+reg)&(-16). Special
+ case load/store quad. */
if (rclass == GENERAL_REGS || rclass == BASE_REGS)
{
- if (!legitimate_indirect_address_p (addr, false)
- && !rs6000_legitimate_offset_address_p (PTImode, addr,
- false, true))
+ if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+ && GET_MODE_SIZE (mode) == 16
+ && quad_memory_operand (x, mode))
+ {
+ sri->icode = icode;
+ sri->extra_cost = 2;
+ }
+
+ else if (!legitimate_indirect_address_p (addr, false)
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
+ false, true))
{
sri->icode = icode;
/* account for splitting the loads, and converting the
@@ -14030,7 +14507,7 @@ rs6000_secondary_reload (bool in_p,
else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
&& (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
&& (legitimate_indirect_address_p (addr, false)
- || legitimate_indirect_address_p (XEXP (addr, 0), false)
+ || legitimate_indirect_address_p (addr, false)
|| rs6000_legitimate_offset_address_p (mode, addr,
false, true)))
@@ -14082,12 +14559,12 @@ rs6000_secondary_reload (bool in_p,
else
{
enum reg_class xclass = REGNO_REG_CLASS (regno);
- enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
- enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+ enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+ enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
/* If memory is needed, use default_secondary_reload to create the
stack slot. */
- if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+ if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
default_p = true;
else
ret = NO_REGS;
@@ -14097,7 +14574,7 @@ rs6000_secondary_reload (bool in_p,
default_p = true;
}
else if (TARGET_POWERPC64
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
&& MEM_P (x)
&& GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
{
@@ -14136,7 +14613,7 @@ rs6000_secondary_reload (bool in_p,
default_p = true;
}
else if (!TARGET_POWERPC64
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
&& MEM_P (x)
&& GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
{
@@ -14699,42 +15176,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
set and vice versa. */
static bool
-rs6000_secondary_memory_needed (enum reg_class class1,
- enum reg_class class2,
+rs6000_secondary_memory_needed (enum reg_class from_class,
+ enum reg_class to_class,
enum machine_mode mode)
{
- if (class1 == class2)
- return false;
-
- /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
- ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy
- between these classes. But we need memory for other things that can go in
- FLOAT_REGS like SFmode. */
- if (TARGET_VSX
- && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
- && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
- || class1 == FLOAT_REGS))
- return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
- && class2 != FLOAT_REGS);
+ enum rs6000_reg_type from_type, to_type;
+ bool altivec_p = ((from_class == ALTIVEC_REGS)
+ || (to_class == ALTIVEC_REGS));
- if (class1 == VSX_REGS || class2 == VSX_REGS)
- return true;
-
- if (class1 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ /* If a simple/direct move is available, we don't need secondary memory */
+ from_type = reg_class_to_reg_type[(int)from_class];
+ to_type = reg_class_to_reg_type[(int)to_class];
- if (class2 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ if (rs6000_secondary_reload_move (to_type, from_type, mode,
+ (secondary_reload_info *)0, altivec_p))
+ return false;
- if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+ /* If we have a floating point or vector register class, we need to use
+ memory to transfer the data. */
+ if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
return true;
return false;
@@ -14742,17 +15202,19 @@ rs6000_secondary_memory_needed (enum reg_class class1,
/* Debug version of rs6000_secondary_memory_needed. */
static bool
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
- enum reg_class class2,
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+ enum reg_class to_class,
enum machine_mode mode)
{
- bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+ bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
fprintf (stderr,
- "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
- "class2 = %s, mode = %s\n",
- ret ? "true" : "false", reg_class_names[class1],
- reg_class_names[class2], GET_MODE_NAME (mode));
+ "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+ "to_class = %s, mode = %s\n",
+ ret ? "true" : "false",
+ reg_class_names[from_class],
+ reg_class_names[to_class],
+ GET_MODE_NAME (mode));
return ret;
}
@@ -14958,6 +15420,170 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from,
return ret;
}
+/* Return a string to do a move operation of 128 bits of data. */
+
+const char *
+rs6000_output_move_128bit (rtx operands[])
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ enum machine_mode mode = GET_MODE (dest);
+ int dest_regno;
+ int src_regno;
+ bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p;
+ bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p;
+
+ if (REG_P (dest))
+ {
+ dest_regno = REGNO (dest);
+ dest_gpr_p = INT_REGNO_P (dest_regno);
+ dest_fp_p = FP_REGNO_P (dest_regno);
+ dest_av_p = ALTIVEC_REGNO_P (dest_regno);
+ dest_vsx_p = dest_fp_p | dest_av_p;
+ }
+ else
+ {
+ dest_regno = -1;
+ dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false;
+ }
+
+ if (REG_P (src))
+ {
+ src_regno = REGNO (src);
+ src_gpr_p = INT_REGNO_P (src_regno);
+ src_fp_p = FP_REGNO_P (src_regno);
+ src_av_p = ALTIVEC_REGNO_P (src_regno);
+ src_vsx_p = src_fp_p | src_av_p;
+ }
+ else
+ {
+ src_regno = -1;
+ src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false;
+ }
+
+ /* Register moves. */
+ if (dest_regno >= 0 && src_regno >= 0)
+ {
+ if (dest_gpr_p)
+ {
+ if (src_gpr_p)
+ return "#";
+
+ else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+ return "#";
+ }
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ if (src_vsx_p)
+ return "xxlor %x0,%x1,%x1";
+
+ else if (TARGET_DIRECT_MOVE && src_gpr_p)
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p && src_av_p)
+ return "vor %0,%1,%1";
+
+ else if (dest_fp_p && src_fp_p)
+ return "#";
+ }
+
+ /* Loads. */
+ else if (dest_regno >= 0 && MEM_P (src))
+ {
+ if (dest_gpr_p)
+ {
+ if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
+ && quad_memory_operand (src, mode)
+ && !reg_overlap_mentioned_p (dest, src))
+ {
+ /* lq/stq only has DQ-form, so avoid X-form that %y produces. */
+ return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
+ }
+ else
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p
+ && altivec_indexed_or_indirect_operand (src, mode))
+ return "lvx %0,%y1";
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ return "lxvw4x %x0,%y1";
+ else
+ return "lxvd2x %x0,%y1";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p)
+ return "lvx %0,%y1";
+
+ else if (dest_fp_p)
+ return "#";
+ }
+
+ /* Stores. */
+ else if (src_regno >= 0 && MEM_P (dest))
+ {
+ if (src_gpr_p)
+ {
+ if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
+ && quad_memory_operand (dest, mode))
+ {
+ /* lq/stq only has DQ-form, so avoid X-form that %y produces. */
+ return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
+ }
+ else
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && src_av_p
+ && altivec_indexed_or_indirect_operand (src, mode))
+ return "stvx %1,%y0";
+
+ else if (TARGET_VSX && src_vsx_p)
+ {
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ return "stxvw4x %x1,%y0";
+ else
+ return "stxvd2x %x1,%y0";
+ }
+
+ else if (TARGET_ALTIVEC && src_av_p)
+ return "stvx %1,%y0";
+
+ else if (src_fp_p)
+ return "#";
+ }
+
+ /* Constants. */
+ else if (dest_regno >= 0
+ && (GET_CODE (src) == CONST_INT
+ || GET_CODE (src) == CONST_DOUBLE
+ || GET_CODE (src) == CONST_VECTOR))
+ {
+ if (dest_gpr_p)
+ return "#";
+
+ else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+ return "xxlxor %x0,%x0,%x0";
+
+ else if (TARGET_ALTIVEC && dest_av_p)
+ return output_vec_const_move (operands);
+ }
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n===== Bad 128 bit move:\n");
+ debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+ }
+
+ gcc_unreachable ();
+}
+
+
/* Given a comparison operation, return the bit number in CCR to test. We
know this is a valid comparison.
@@ -15674,11 +16300,6 @@ print_operand (FILE *file, rtx x, int code)
TOCs and the like. */
gcc_assert (GET_CODE (x) == SYMBOL_REF);
- /* Mark the decl as referenced so that cgraph will output the
- function. */
- if (SYMBOL_REF_DECL (x))
- mark_decl_referenced (SYMBOL_REF_DECL (x));
-
/* For macho, check to see if we need a stub. */
if (TARGET_MACHO)
{
@@ -16087,16 +16708,41 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
{
rtx cmp, or_result, compare_result2;
enum machine_mode op_mode = GET_MODE (op0);
+ bool reverse_p;
if (op_mode == VOIDmode)
op_mode = GET_MODE (op1);
+ /* First reverse the condition codes that aren't directly supported. */
+ switch (code)
+ {
+ case NE:
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ code = reverse_condition_maybe_unordered (code);
+ reverse_p = true;
+ break;
+
+ case EQ:
+ case LT:
+ case LE:
+ case GT:
+ case GE:
+ reverse_p = false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
/* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
This explains the following mess. */
switch (code)
{
- case EQ: case UNEQ: case NE: case LTGT:
+ case EQ:
switch (op_mode)
{
case SFmode:
@@ -16122,7 +16768,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
}
break;
- case GT: case GTU: case UNGT: case UNGE: case GE: case GEU:
+ case GT:
+ case GE:
switch (op_mode)
{
case SFmode:
@@ -16148,7 +16795,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
}
break;
- case LT: case LTU: case UNLT: case UNLE: case LE: case LEU:
+ case LT:
+ case LE:
switch (op_mode)
{
case SFmode:
@@ -16173,24 +16821,16 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
gcc_unreachable ();
}
break;
+
default:
gcc_unreachable ();
}
/* Synthesize LE and GE from LT/GT || EQ. */
- if (code == LE || code == GE || code == LEU || code == GEU)
+ if (code == LE || code == GE)
{
emit_insn (cmp);
- switch (code)
- {
- case LE: code = LT; break;
- case GE: code = GT; break;
- case LEU: code = LT; break;
- case GEU: code = GT; break;
- default: gcc_unreachable ();
- }
-
compare_result2 = gen_reg_rtx (CCFPmode);
/* Do the EQ. */
@@ -16217,23 +16857,18 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
default:
gcc_unreachable ();
}
+
emit_insn (cmp);
/* OR them together. */
or_result = gen_reg_rtx (CCFPmode);
cmp = gen_e500_cr_ior_compare (or_result, compare_result,
- compare_result2);
+ compare_result2);
compare_result = or_result;
- code = EQ;
- }
- else
- {
- if (code == NE || code == LTGT)
- code = NE;
- else
- code = EQ;
}
+ code = reverse_p ? NE : EQ;
+
emit_insn (cmp);
}
else
@@ -17113,7 +17748,8 @@ emit_unlikely_jump (rtx cond, rtx label)
}
/* A subroutine of the atomic operation splitters. Emit a load-locked
- instruction in MODE. */
+ instruction in MODE. For QI/HImode, possibly use a pattern than includes
+ the zero_extend operation. */
static void
emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
@@ -17122,12 +17758,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
switch (mode)
{
+ case QImode:
+ fn = gen_load_lockedqi;
+ break;
+ case HImode:
+ fn = gen_load_lockedhi;
+ break;
case SImode:
- fn = gen_load_lockedsi;
+ if (GET_MODE (mem) == QImode)
+ fn = gen_load_lockedqi_si;
+ else if (GET_MODE (mem) == HImode)
+ fn = gen_load_lockedhi_si;
+ else
+ fn = gen_load_lockedsi;
break;
case DImode:
fn = gen_load_lockeddi;
break;
+ case TImode:
+ fn = gen_load_lockedti;
+ break;
default:
gcc_unreachable ();
}
@@ -17144,12 +17794,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
switch (mode)
{
+ case QImode:
+ fn = gen_store_conditionalqi;
+ break;
+ case HImode:
+ fn = gen_store_conditionalhi;
+ break;
case SImode:
fn = gen_store_conditionalsi;
break;
case DImode:
fn = gen_store_conditionaldi;
break;
+ case TImode:
+ fn = gen_store_conditionalti;
+ break;
default:
gcc_unreachable ();
}
@@ -17246,8 +17905,9 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
shift = gen_reg_rtx (SImode);
addr = gen_lowpart (SImode, addr);
emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
- shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
- shift, 1, OPTAB_LIB_WIDEN);
+ if (WORDS_BIG_ENDIAN)
+ shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
+ shift, 1, OPTAB_LIB_WIDEN);
*pshift = shift;
/* Mask for insertion. */
@@ -17295,7 +17955,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
{
rtx boolval, retval, mem, oldval, newval, cond;
rtx label1, label2, x, mask, shift;
- enum machine_mode mode;
+ enum machine_mode mode, orig_mode;
enum memmodel mod_s, mod_f;
bool is_weak;
@@ -17307,22 +17967,29 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
is_weak = (INTVAL (operands[5]) != 0);
mod_s = (enum memmodel) INTVAL (operands[6]);
mod_f = (enum memmodel) INTVAL (operands[7]);
- mode = GET_MODE (mem);
+ orig_mode = mode = GET_MODE (mem);
mask = shift = NULL_RTX;
if (mode == QImode || mode == HImode)
{
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
- /* Shift and mask OLDVAL into position with the word. */
+ /* Before power8, we didn't have access to lbarx/lharx, so generate a
+ lwarx and shift/mask operations. With power8, we need to do the
+ comparison in SImode, but the store is still done in QI/HImode. */
oldval = convert_modes (SImode, mode, oldval, 1);
- oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- /* Shift and mask NEWVAL into position within the word. */
- newval = convert_modes (SImode, mode, newval, 1);
- newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ if (!TARGET_SYNC_HI_QI)
+ {
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+ /* Shift and mask OLDVAL into position with the word. */
+ oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+ /* Shift and mask NEWVAL into position within the word. */
+ newval = convert_modes (SImode, mode, newval, 1);
+ newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ }
/* Prepare to adjust the return value. */
retval = gen_reg_rtx (SImode);
@@ -17351,7 +18018,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
}
cond = gen_reg_rtx (CCmode);
- x = gen_rtx_COMPARE (CCmode, x, oldval);
+ /* If we have TImode, synthesize a comparison. */
+ if (mode != TImode)
+ x = gen_rtx_COMPARE (CCmode, x, oldval);
+ else
+ {
+ rtx xor1_result = gen_reg_rtx (DImode);
+ rtx xor2_result = gen_reg_rtx (DImode);
+ rtx or_result = gen_reg_rtx (DImode);
+ rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+ rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+ rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+ rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+ emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+ emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+ emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+ x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+ }
+
emit_insn (gen_rtx_SET (VOIDmode, cond, x));
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
@@ -17361,7 +18046,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
if (mask)
x = rs6000_mask_atomic_subword (retval, newval, mask);
- emit_store_conditional (mode, cond, mem, x);
+ emit_store_conditional (orig_mode, cond, mem, x);
if (!is_weak)
{
@@ -17379,6 +18064,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
if (shift)
rs6000_finish_atomic_subword (operands[1], retval, shift);
+ else if (mode != GET_MODE (operands[1]))
+ convert_move (operands[1], retval, 1);
/* In all cases, CR0 contains EQ on success, and NE on failure. */
x = gen_rtx_EQ (SImode, cond, const0_rtx);
@@ -17402,7 +18089,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
mode = GET_MODE (mem);
mask = shift = NULL_RTX;
- if (mode == QImode || mode == HImode)
+ if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
{
mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
@@ -17451,53 +18138,70 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
{
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
enum machine_mode mode = GET_MODE (mem);
+ enum machine_mode store_mode = mode;
rtx label, x, cond, mask, shift;
rtx before = orig_before, after = orig_after;
mask = shift = NULL_RTX;
+ /* On power8, we want to use SImode for the operation. On previous systems,
+ use the operation in a subword and shift/mask to get the proper byte or
+ halfword. */
if (mode == QImode || mode == HImode)
{
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
- /* Shift and mask VAL into position with the word. */
- val = convert_modes (SImode, mode, val, 1);
- val = expand_simple_binop (SImode, ASHIFT, val, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ if (TARGET_SYNC_HI_QI)
+ {
+ val = convert_modes (SImode, mode, val, 1);
- switch (code)
+ /* Prepare to adjust the return value. */
+ before = gen_reg_rtx (SImode);
+ if (after)
+ after = gen_reg_rtx (SImode);
+ mode = SImode;
+ }
+ else
{
- case IOR:
- case XOR:
- /* We've already zero-extended VAL. That is sufficient to
- make certain that it does not affect other bits. */
- mask = NULL;
- break;
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
- case AND:
- /* If we make certain that all of the other bits in VAL are
- set, that will be sufficient to not affect other bits. */
- x = gen_rtx_NOT (SImode, mask);
- x = gen_rtx_IOR (SImode, x, val);
- emit_insn (gen_rtx_SET (VOIDmode, val, x));
- mask = NULL;
- break;
+ /* Shift and mask VAL into position with the word. */
+ val = convert_modes (SImode, mode, val, 1);
+ val = expand_simple_binop (SImode, ASHIFT, val, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
- case NOT:
- case PLUS:
- case MINUS:
- /* These will all affect bits outside the field and need
- adjustment via MASK within the loop. */
- break;
+ switch (code)
+ {
+ case IOR:
+ case XOR:
+ /* We've already zero-extended VAL. That is sufficient to
+ make certain that it does not affect other bits. */
+ mask = NULL;
+ break;
- default:
- gcc_unreachable ();
- }
+ case AND:
+ /* If we make certain that all of the other bits in VAL are
+ set, that will be sufficient to not affect other bits. */
+ x = gen_rtx_NOT (SImode, mask);
+ x = gen_rtx_IOR (SImode, x, val);
+ emit_insn (gen_rtx_SET (VOIDmode, val, x));
+ mask = NULL;
+ break;
- /* Prepare to adjust the return value. */
- before = gen_reg_rtx (SImode);
- if (after)
- after = gen_reg_rtx (SImode);
- mode = SImode;
+ case NOT:
+ case PLUS:
+ case MINUS:
+ /* These will all affect bits outside the field and need
+ adjustment via MASK within the loop. */
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Prepare to adjust the return value. */
+ before = gen_reg_rtx (SImode);
+ if (after)
+ after = gen_reg_rtx (SImode);
+ store_mode = mode = SImode;
+ }
}
mem = rs6000_pre_atomic_barrier (mem, model);
@@ -17530,9 +18234,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
x = rs6000_mask_atomic_subword (before, x, mask);
}
+ else if (store_mode != mode)
+ x = convert_modes (store_mode, mode, x, 1);
cond = gen_reg_rtx (CCmode);
- emit_store_conditional (mode, cond, mem, x);
+ emit_store_conditional (store_mode, cond, mem, x);
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
emit_unlikely_jump (x, label);
@@ -17541,11 +18247,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
if (shift)
{
+ /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+ then do the calcuations in a SImode register. */
if (orig_before)
rs6000_finish_atomic_subword (orig_before, before, shift);
if (orig_after)
rs6000_finish_atomic_subword (orig_after, after, shift);
}
+ else if (store_mode != mode)
+ {
+ /* QImode/HImode on machines with lbarx/lharx where we do the native
+ operation and then do the calcuations in a SImode register. */
+ if (orig_before)
+ convert_move (orig_before, before, 1);
+ if (orig_after)
+ convert_move (orig_after, after, 1);
+ }
else if (orig_after && after != orig_after)
emit_move_insn (orig_after, after);
}
@@ -22560,7 +23277,10 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
fputs (DOUBLE_INT_ASM_OP, file);
else
fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
- fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+ if (WORDS_BIG_ENDIAN)
+ fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+ else
+ fprintf (file, "0x%lx\n", l & 0xffffffff);
return;
}
else
@@ -27197,26 +27917,31 @@ bool
altivec_expand_vec_perm_const (rtx operands[4])
{
struct altivec_perm_insn {
+ HOST_WIDE_INT mask;
enum insn_code impl;
unsigned char perm[16];
};
static const struct altivec_perm_insn patterns[] = {
- { CODE_FOR_altivec_vpkuhum,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum,
{ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
- { CODE_FOR_altivec_vpkuwum,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
- { CODE_FOR_altivec_vmrghb,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghb,
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
- { CODE_FOR_altivec_vmrghh,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghh,
{ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
- { CODE_FOR_altivec_vmrghw,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghw,
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
- { CODE_FOR_altivec_vmrglb,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglb,
{ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
- { CODE_FOR_altivec_vmrglh,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglh,
{ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
- { CODE_FOR_altivec_vmrglw,
- { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglw,
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+ { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+ { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
};
unsigned int i, j, elt, which;
@@ -27316,6 +28041,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
{
bool swapped;
+ if ((patterns[j].mask & rs6000_isa_flags) == 0)
+ continue;
+
elt = patterns[j].perm[0];
if (perm[0] == elt)
swapped = false;
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 76f3bf99250..021e72a80e3 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -715,6 +715,11 @@ extern unsigned char rs6000_recip_bits[];
instructions for them. Might as well be consistent with bits and bytes. */
#define WORDS_BIG_ENDIAN 1
+/* This says that for the IBM long double the larger magnitude double
+ comes first. It's really a two element double array, and arrays
+ don't index differently between little- and big-endian. */
+#define LONG_DOUBLE_LARGE_FIRST 1
+
#define MAX_BITS_PER_WORD 64
/* Width of a word, in units (bytes). */
@@ -1114,10 +1119,10 @@ extern unsigned rs6000_pointer_size;
#define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
/* Alternate name for any vector register supporting logical operations, no
- matter which instruction set(s) are available. Under VSX, we allow GPRs as
- well as vector registers on 64-bit systems. We don't allow 32-bit systems,
- due to the number of registers involved, and the number of instructions to
- load/store the values.. */
+ matter which instruction set(s) are available. For 64-bit mode, we also
+ allow logical operations in the GPRS. This is to allow atomic quad word
+ builtins not to need the VSX registers for lqarx/stqcx. It also helps with
+ __int128_t arguments that are passed in GPRs. */
#define VLOGICAL_REGNO_P(N) \
(ALTIVEC_REGNO_P (N) \
|| (TARGET_VSX && FP_REGNO_P (N)) \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 013a0e38551..7fe23bff351 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -127,6 +127,13 @@
UNSPEC_LFIWZX
UNSPEC_FCTIWUZ
UNSPEC_GRP_END_NOP
+ UNSPEC_P8V_FMRGOW
+ UNSPEC_P8V_MTVSRWZ
+ UNSPEC_P8V_RELOAD_FROM_GPR
+ UNSPEC_P8V_MTVSRD
+ UNSPEC_P8V_XXPERMDI
+ UNSPEC_P8V_RELOAD_FROM_VSX
+ UNSPEC_FUSION_GPR
])
;;
@@ -166,9 +173,14 @@
(const_int 4)))
;; Processor type -- this attribute must exactly match the processor_type
-;; enumeration in rs6000.h.
-
-(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan,power8"
+;; enumeration in rs6000-opts.h.
+(define_attr "cpu"
+ "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,
+ ppc750,ppc7400,ppc7450,
+ ppc403,ppc405,ppc440,ppc476,
+ ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,
+ power4,power5,power6,power7,power8,
+ rs64a,mpccore,cell,ppca2,titan"
(const (symbol_ref "rs6000_cpu_attr")))
@@ -227,6 +239,12 @@
; extend modes for DImode
(define_mode_iterator QHSI [QI HI SI])
+; QImode or HImode for small atomic ops
+(define_mode_iterator QHI [QI HI])
+
+; HImode or SImode for sign extended fusion ops
+(define_mode_iterator HSI [HI SI])
+
; SImode or DImode, even if DImode doesn't fit in GPRs.
(define_mode_iterator SDI [SI DI])
@@ -268,6 +286,15 @@
(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+; Iterators for 128 bit types for direct move
+(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE")
+ (V16QI "")
+ (V8HI "")
+ (V4SI "")
+ (V4SF "")
+ (V2DI "")
+ (V2DF "")])
+
; Whether a floating point move is ok, don't allow SD without hardware FP
(define_mode_attr fmove_ok [(SF "")
(DF "")
@@ -284,11 +311,16 @@
(define_mode_attr f32_lr [(SF "f") (SD "wz")])
(define_mode_attr f32_lm [(SF "m") (SD "Z")])
(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %0,%y1") (SD "lxsiwzx %0,%y1")])
; Definitions for store from 32-bit fpr register
(define_mode_attr f32_sr [(SF "f") (SD "wx")])
(define_mode_attr f32_sm [(SF "m") (SD "Z")])
(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %1,%y0") (SD "stxsiwzx %1,%y0")])
+
+; Definitions for 32-bit fpr direct move
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
; These modes do not fit in integer registers in 32-bit mode.
; but on e500v2, the gpr are 64 bit registers
@@ -368,7 +400,7 @@
(define_insn "*zero_extend<mode>di2_internal1"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
- "TARGET_POWERPC64"
+ "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
"@
l<wd>z%U1%X1 %0,%1
rldicl %0,%1,0,<dbits>"
@@ -434,6 +466,29 @@
(const_int 0)))]
"")
+(define_insn "*zero_extendsidi2_lfiwzx"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm")
+ (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
+ "TARGET_POWERPC64 && TARGET_LFIWZX"
+ "@
+ lwz%U1%X1 %0,%1
+ rldicl %0,%1,0,32
+ mtvsrwz %x0,%1
+ lfiwzx %0,%y1
+ lxsiwzx %x0,%y1"
+ [(set_attr_alternative "type"
+ [(if_then_else
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ux")
+ (if_then_else
+ (match_test "update_address_mem (operands[1], VOIDmode)")
+ (const_string "load_u")
+ (const_string "load")))
+ (const_string "*")
+ (const_string "mffgpr")
+ (const_string "fpload")
+ (const_string "fpload")])])
+
(define_insn "extendqidi2"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
@@ -581,10 +636,33 @@
"TARGET_POWERPC64"
"")
-(define_insn ""
+(define_insn "*extendsidi2_lfiwax"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm")
+ (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+ "TARGET_POWERPC64 && TARGET_LFIWAX"
+ "@
+ lwa%U1%X1 %0,%1
+ extsw %0,%1
+ mtvsrwa %x0,%1
+ lfiwax %0,%y1
+ lxsiwax %x0,%y1"
+ [(set_attr_alternative "type"
+ [(if_then_else
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ext_ux")
+ (if_then_else
+ (match_test "update_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ext_u")
+ (const_string "load_ext")))
+ (const_string "exts")
+ (const_string "mffgpr")
+ (const_string "fpload")
+ (const_string "fpload")])])
+
+(define_insn "*extendsidi2_nocell"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
- "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+ "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
"@
lwa%U1%X1 %0,%1
extsw %0,%1"
@@ -598,7 +676,7 @@
(const_string "load_ext")))
(const_string "exts")])])
-(define_insn ""
+(define_insn "*extendsidi2_nocell"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
"TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
@@ -2035,7 +2113,9 @@
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
"TARGET_CMPB && TARGET_POPCNTB"
- "prty<wd> %0,%1")
+ "prty<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "popcnt")])
(define_expand "parity<mode>2"
[(set (match_operand:GPR 0 "gpc_reg_operand" "")
@@ -4316,7 +4396,7 @@
#
#
#"
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
(set_attr "length" "4,4,4,8,8,8")])
(define_split
@@ -4348,7 +4428,7 @@
#
#
#"
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
(set_attr "length" "4,4,4,8,8,8")])
(define_split
@@ -5104,6 +5184,41 @@
"frsqrtes %0,%1"
[(set_attr "type" "fp")])
+;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
+;; builtins.c and optabs.c that are not correct for IBM long double
+;; when little-endian.
+(define_expand "signbittf2"
+ [(set (match_dup 2)
+ (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))
+ (set (match_dup 3)
+ (subreg:DI (match_dup 2) 0))
+ (set (match_dup 4)
+ (match_dup 5))
+ (set (match_operand:SI 0 "gpc_reg_operand" "")
+ (match_dup 6))]
+ "!TARGET_IEEEQUAD
+ && TARGET_HARD_FLOAT
+ && (TARGET_FPRS || TARGET_E500_DOUBLE)
+ && TARGET_LONG_DOUBLE_128"
+{
+ operands[2] = gen_reg_rtx (DFmode);
+ operands[3] = gen_reg_rtx (DImode);
+ if (TARGET_POWERPC64)
+ {
+ operands[4] = gen_reg_rtx (DImode);
+ operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63));
+ operands[6] = gen_rtx_SUBREG (SImode, operands[4],
+ WORDS_BIG_ENDIAN ? 4 : 0);
+ }
+ else
+ {
+ operands[4] = gen_reg_rtx (SImode);
+ operands[5] = gen_rtx_SUBREG (SImode, operands[3],
+ WORDS_BIG_ENDIAN ? 0 : 4);
+ operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31));
+ }
+})
+
(define_expand "copysign<mode>3"
[(set (match_dup 3)
(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))
@@ -5553,12 +5668,15 @@
; We don't define lfiwax/lfiwzx with the normal definition, because we
; don't want to support putting SImode in FPR registers.
(define_insn "lfiwax"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
UNSPEC_LFIWAX))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
- "lfiwax %0,%y1"
- [(set_attr "type" "fpload")])
+ "@
+ lfiwax %0,%y1
+ lxsiwax %x0,%y1
+ mtvsrwa %x0,%1"
+ [(set_attr "type" "fpload,fpload,mffgpr")])
; This split must be run before register allocation because it allocates the
; memory slot that is needed to move values to/from the FPR. We don't allocate
@@ -5580,7 +5698,8 @@
rtx src = operands[1];
rtx tmp;
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+ if (!MEM_P (src) && TARGET_POWERPC64
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
tmp = convert_to_mode (DImode, src, false);
else
{
@@ -5629,12 +5748,15 @@
(set_attr "type" "fpload")])
(define_insn "lfiwzx"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
UNSPEC_LFIWZX))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
- "lfiwzx %0,%y1"
- [(set_attr "type" "fpload")])
+ "@
+ lfiwzx %0,%y1
+ lxsiwzx %x0,%y1
+ mtvsrwz %x0,%1"
+ [(set_attr "type" "fpload,fpload,mftgpr")])
(define_insn_and_split "floatunssi<mode>2_lfiwzx"
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
@@ -5651,7 +5773,8 @@
rtx src = operands[1];
rtx tmp;
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+ if (!MEM_P (src) && TARGET_POWERPC64
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
tmp = convert_to_mode (DImode, src, true);
else
{
@@ -5942,7 +6065,7 @@
emit_insn (gen_stfiwx (dest, tmp));
DONE;
}
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
{
dest = gen_lowpart (DImode, dest);
emit_move_insn (dest, tmp);
@@ -6036,7 +6159,7 @@
emit_insn (gen_stfiwx (dest, tmp));
DONE;
}
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
{
dest = gen_lowpart (DImode, dest);
emit_move_insn (dest, tmp);
@@ -8285,6 +8408,18 @@
(compare:CC (match_dup 0)
(const_int 0)))]
"")
+
+;; Eqv operation.
+(define_insn "*eqv<mode>3"
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+ (not:GPR
+ (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
+ ""
+ "eqv %0,%1,%2"
+ [(set_attr "type" "integer")
+ (set_attr "length" "4")])
+
;; Now define ways of moving data around.
@@ -8490,7 +8625,7 @@
cmp<wd>i %2,%0,0
mr. %0,%1
#"
- [(set_attr "type" "cmp,compare,cmp")
+ [(set_attr "type" "cmp,fast_compare,cmp")
(set_attr "length" "4,4,8")])
(define_split
@@ -8680,8 +8815,8 @@
}")
(define_insn "mov<mode>_hardfloat"
- [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r")
- (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))]
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+ (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))]
"(gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -8694,6 +8829,10 @@
xxlxor %x0,%x0,%x0
<f32_li>
<f32_si>
+ <f32_lv>
+ <f32_sv>
+ mtvsrwz %x0,%1
+ mfvsrwz %0,%x1
mt%0 %1
mf%1 %0
nop
@@ -8732,16 +8871,20 @@
(match_test "update_address_mem (operands[0], VOIDmode)")
(const_string "fpstore_u")
(const_string "fpstore")))
+ (const_string "fpload")
+ (const_string "fpstore")
+ (const_string "mftgpr")
+ (const_string "mffgpr")
(const_string "mtjmpr")
(const_string "mfjmpr")
(const_string "*")
(const_string "*")
(const_string "*")])
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")])
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
(define_insn "*mov<mode>_softfloat"
[(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
- (match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))]
+ (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
"(gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
@@ -8954,8 +9097,8 @@
; ld/std require word-aligned displacements -> 'Y' constraint.
; List Y->r and r->Y before r->r for reload.
(define_insn "*mov<mode>_hardfloat64"
- [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg")
- (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))]
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
"TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8980,7 +9123,9 @@
#
#
mftgpr %0,%1
- mffgpr %0,%1"
+ mffgpr %0,%1
+ mfvsrd %0,%x1
+ mtvsrd %x0,%1"
[(set_attr_alternative "type"
[(if_then_else
(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9038,8 +9183,10 @@
(const_string "*")
(const_string "*")
(const_string "mftgpr")
+ (const_string "mffgpr")
+ (const_string "mftgpr")
(const_string "mffgpr")])
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
(define_insn "*mov<mode>_softfloat64"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
@@ -9154,8 +9301,8 @@
"&& reload_completed"
[(pc)]
{
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word),
operands[1]);
emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word),
@@ -9384,8 +9531,8 @@
&& TARGET_LONG_DOUBLE_128"
"
{
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
operands[3] = gen_reg_rtx (DFmode);
operands[4] = gen_reg_rtx (CCFPmode);
operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
@@ -9419,6 +9566,216 @@
})
+;; Power8 merge instructions to allow direct move to/from floating point
+;; registers in 32-bit mode. We use TF mode to get two registers to move the
+;; individual 32-bit parts across. Subreg doesn't work too well on the TF
+;; value, since it is allocated in reload and not all of the flow information
+;; is setup for it. We have two patterns to do the two moves between gprs and
+;; fprs. There isn't a dependancy between the two, but we could potentially
+;; schedule other instructions between the two instructions. TFmode is
+;; currently limited to traditional FPR registers. If/when this is changed, we
+;; will need to revist %L to make sure it works with VSX registers, or add an
+;; %x version of %L.
+
+(define_insn "p8_fmrgow_<mode>"
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
+ (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
+ UNSPEC_P8V_FMRGOW))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "fmrgow %0,%1,%L1"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "p8_mtvsrwz_1"
+ [(set (match_operand:TF 0 "register_operand" "=d")
+ (unspec:TF [(match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRWZ))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrwz %x0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrwz_2"
+ [(set (match_operand:TF 0 "register_operand" "+d")
+ (unspec:TF [(match_dup 0)
+ (match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRWZ))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrwz %L0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
+ (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:TF 2 "register_operand" "=d"))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (SImode, src);
+ rtx gpr_lo_reg = gen_lowpart (SImode, src);
+
+ emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
+ emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
+ emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
+ DONE;
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
+(define_insn "p8_mtvsrd_1"
+ [(set (match_operand:TF 0 "register_operand" "=ws")
+ (unspec:TF [(match_operand:DI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRD))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrd %0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_2"
+ [(set (match_operand:TF 0 "register_operand" "+ws")
+ (unspec:TF [(match_dup 0)
+ (match_operand:DI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRD))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrd %L0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_xxpermdi_<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+ (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
+ UNSPEC_P8V_XXPERMDI))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "xxpermdi %x0,%1,%L1,0"
+ [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+ (unspec:FMOVE128_GPR
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:TF 2 "register_operand" "=ws"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (DImode, src);
+ rtx gpr_lo_reg = gen_lowpart (DImode, src);
+
+ emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
+ emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
+ emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move SFmode to a VSX from a GPR register. Because scalar floating point
+;; type is stored internally as double precision in the VSX registers, we have
+;; to convert it from the vector format.
+
+(define_insn_and_split "reload_vsx_from_gprsf"
+ [(set (match_operand:SF 0 "register_operand" "=wa")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:DI 2 "register_operand" "=r"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
+ rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
+
+ /* Move SF value to upper 32-bits for xscvspdpn. */
+ emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+ emit_move_insn (op0_di, op2);
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "two")])
+
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
+;; and then doing a move of that.
+(define_insn "p8_mfvsrd_3_<mode>"
+ [(set (match_operand:DF 0 "register_operand" "=r")
+ (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "mfvsrd %0,%x1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
+ (unspec:FMOVE128_GPR
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))
+ (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (DFmode, dest);
+ rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
+
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
+ emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move SFmode to a GPR from a VSX register. Because scalar floating point
+;; type is stored internally as double precision, we have to convert it to the
+;; vector format.
+
+(define_insn_and_split "reload_gpr_from_vsxsf"
+ [(set (match_operand:SF 0 "register_operand" "=r")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))
+ (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+
+ emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
+ emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
+ emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+ DONE;
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+(define_insn "p8_mfvsrd_4_disf"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "mfvsrd %0,%x1"
+ [(set_attr "type" "mftgpr")])
+
+
;; Next come the multi-word integer load and store and the load and store
;; multiple insns.
@@ -9467,7 +9824,8 @@
[(set (match_operand:DI 0 "gpc_reg_operand" "")
(match_operand:DI 1 "const_int_operand" ""))]
"! TARGET_POWERPC64 && reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 1))]
"
@@ -9485,13 +9843,14 @@
[(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
(match_operand:DIFD 1 "input_operand" ""))]
"reload_completed && !TARGET_POWERPC64
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
(define_insn "*movdi_internal64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg")
- (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm")
+ (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))]
"TARGET_POWERPC64
&& (gpc_reg_operand (operands[0], DImode)
|| gpc_reg_operand (operands[1], DImode))"
@@ -9513,7 +9872,9 @@
nop
xxlxor %x0,%x0,%x0
mftgpr %0,%1
- mffgpr %0,%1"
+ mffgpr %0,%1
+ mfvsrd %0,%x1
+ mtvsrd %x0,%1"
[(set_attr_alternative "type"
[(if_then_else
(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9562,8 +9923,10 @@
(const_string "*")
(const_string "vecsimple")
(const_string "mftgpr")
+ (const_string "mffgpr")
+ (const_string "mftgpr")
(const_string "mffgpr")])
- (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")])
+ (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")])
;; Generate all one-bits and clear left or right.
;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
@@ -9652,19 +10015,23 @@
(const_string "conditional")))])
(define_insn "*mov<mode>_ppc64"
- [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r")
- (match_operand:TI2 1 "input_operand" "r,Y,r"))]
- "(TARGET_POWERPC64
- && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
+ [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
+ (match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+ "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode)))"
- "#"
- [(set_attr "type" "store,load,*")])
+{
+ return rs6000_output_move_128bit (operands);
+}
+ [(set_attr "type" "store,load,*,*")
+ (set_attr "length" "8")])
(define_split
- [(set (match_operand:TI2 0 "gpc_reg_operand" "")
+ [(set (match_operand:TI2 0 "int_reg_operand" "")
(match_operand:TI2 1 "const_double_operand" ""))]
- "TARGET_POWERPC64"
+ "TARGET_POWERPC64
+ && (VECTOR_MEM_NONE_P (<MODE>mode)
+ || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
"
@@ -9691,7 +10058,9 @@
[(set (match_operand:TI2 0 "nonimmediate_operand" "")
(match_operand:TI2 1 "input_operand" ""))]
"reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])
+ && !quad_load_store_p (operands[0], operands[1])"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
@@ -12554,8 +12923,8 @@
(match_dup 13)]
{
REAL_VALUE_TYPE rv;
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word);
operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word);
diff --git a/gcc/config/rs6000/spe.md b/gcc/config/rs6000/spe.md
index cec2b430b82..bf10a5dc180 100644
--- a/gcc/config/rs6000/spe.md
+++ b/gcc/config/rs6000/spe.md
@@ -2604,8 +2604,8 @@
&& TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
"
{
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
operands[3] = gen_reg_rtx (DFmode);
operands[4] = gen_reg_rtx (CCFPmode);
operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
@@ -2627,8 +2627,8 @@
&& TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
"
{
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
operands[3] = gen_reg_rtx (DFmode);
operands[4] = gen_reg_rtx (CCFPmode);
operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 252e2690a98..8616b3eca5f 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -18,14 +18,23 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
+(define_mode_attr larx [(QI "lbarx")
+ (HI "lharx")
+ (SI "lwarx")
+ (DI "ldarx")
+ (TI "lqarx")])
+
+(define_mode_attr stcx [(QI "stbcx.")
+ (HI "sthcx.")
+ (SI "stwcx.")
+ (DI "stdcx.")
+ (TI "stqcx.")])
(define_code_iterator FETCHOP [plus minus ior xor and])
(define_code_attr fetchop_name
[(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
(define_code_attr fetchop_pred
- [(plus "add_operand") (minus "gpc_reg_operand")
+ [(plus "add_operand") (minus "int_reg_operand")
(ior "logical_operand") (xor "logical_operand") (and "and_operand")])
(define_expand "mem_thread_fence"
@@ -129,16 +138,7 @@
case MEMMODEL_CONSUME:
case MEMMODEL_ACQUIRE:
case MEMMODEL_SEQ_CST:
- if (GET_MODE (operands[0]) == QImode)
- emit_insn (gen_loadsync_qi (operands[0]));
- else if (GET_MODE (operands[0]) == HImode)
- emit_insn (gen_loadsync_hi (operands[0]));
- else if (GET_MODE (operands[0]) == SImode)
- emit_insn (gen_loadsync_si (operands[0]));
- else if (GET_MODE (operands[0]) == DImode)
- emit_insn (gen_loadsync_di (operands[0]));
- else
- gcc_unreachable ();
+ emit_insn (gen_loadsync_<mode> (operands[0]));
break;
default:
gcc_unreachable ();
@@ -170,35 +170,109 @@
DONE;
})
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
-;; opcode that is "phased-in". Not implemented as of Power7, so not yet used,
-;; but let's prepare the macros anyway.
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
+;; other than the quad memory operations, which have special restrictions.
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
+;; in and did not show up until power8. TImode atomic lqarx/stqcx. require
+;; special handling due to even/odd register requirements.
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
+ (HI "TARGET_SYNC_HI_QI")
+ SI
+ (DI "TARGET_POWERPC64")])
+
+;; Types that we should provide atomic instructions for.
-(define_mode_iterator ATOMIC [SI (DI "TARGET_POWERPC64")])
+(define_mode_iterator AINT [QI
+ HI
+ SI
+ (DI "TARGET_POWERPC64")
+ (TI "TARGET_SYNC_TI")])
(define_insn "load_locked<mode>"
- [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
+ [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
(unspec_volatile:ATOMIC
[(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
""
"<larx> %0,%y1"
[(set_attr "type" "load_l")])
+(define_insn "load_locked<QHI:mode>_si"
+ [(set (match_operand:SI 0 "int_reg_operand" "=r")
+ (unspec_volatile:SI
+ [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ "TARGET_SYNC_HI_QI"
+ "<QHI:larx> %0,%y1"
+ [(set_attr "type" "load_l")])
+
+;; Use PTImode to get even/odd register pairs
+(define_expand "load_lockedti"
+ [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
+ (use (match_operand:TI 1 "memory_operand" ""))]
+ "TARGET_SYNC_TI"
+{
+ /* Use a temporary register to force getting an even register for the
+ lqarx/stqcrx. instructions. Normal optimizations will eliminate this
+ extra copy. */
+ rtx pti = gen_reg_rtx (PTImode);
+ emit_insn (gen_load_lockedpti (pti, operands[1]));
+ emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+ DONE;
+})
+
+(define_insn "load_lockedpti"
+ [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+ (unspec_volatile:PTI
+ [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ "TARGET_SYNC_TI
+ && !reg_mentioned_p (operands[0], operands[1])
+ && quad_int_reg_operand (operands[0], PTImode)"
+ "lqarx %0,%y1"
+ [(set_attr "type" "load_l")])
+
(define_insn "store_conditional<mode>"
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
(set (match_operand:ATOMIC 1 "memory_operand" "=Z")
- (match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
+ (match_operand:ATOMIC 2 "int_reg_operand" "r"))]
""
"<stcx> %2,%y1"
[(set_attr "type" "store_c")])
+(define_expand "store_conditionalti"
+ [(use (match_operand:CC 0 "cc_reg_operand" ""))
+ (use (match_operand:TI 1 "memory_operand" ""))
+ (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
+ "TARGET_SYNC_TI"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
+ rtx pti_op2 = gen_reg_rtx (PTImode);
+
+ /* Use a temporary register to force getting an even register for the
+ lqarx/stqcrx. instructions. Normal optimizations will eliminate this
+ extra copy. */
+ emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
+ emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+ DONE;
+})
+
+(define_insn "store_conditionalpti"
+ [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+ (set (match_operand:PTI 1 "memory_operand" "=Z")
+ (match_operand:PTI 2 "quad_int_reg_operand" "r"))]
+ "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
+ "stqcx. %2,%y1"
+ [(set_attr "type" "store_c")])
+
(define_expand "atomic_compare_and_swap<mode>"
- [(match_operand:SI 0 "gpc_reg_operand" "") ;; bool out
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; val out
- (match_operand:INT1 2 "memory_operand" "") ;; memory
- (match_operand:INT1 3 "reg_or_short_operand" "") ;; expected
- (match_operand:INT1 4 "gpc_reg_operand" "") ;; desired
+ [(match_operand:SI 0 "int_reg_operand" "") ;; bool out
+ (match_operand:AINT 1 "int_reg_operand" "") ;; val out
+ (match_operand:AINT 2 "memory_operand" "") ;; memory
+ (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected
+ (match_operand:AINT 4 "int_reg_operand" "") ;; desired
(match_operand:SI 5 "const_int_operand" "") ;; is_weak
(match_operand:SI 6 "const_int_operand" "") ;; model succ
(match_operand:SI 7 "const_int_operand" "")] ;; model fail
@@ -209,9 +283,9 @@
})
(define_expand "atomic_exchange<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; input
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; input
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -220,9 +294,9 @@
})
(define_expand "atomic_<fetchop_name><mode>"
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 0)
- (match_operand:INT1 1 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 0)
+ (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 2 "const_int_operand" "")] ;; model
""
{
@@ -232,8 +306,8 @@
})
(define_expand "atomic_nand<mode>"
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
+ (match_operand:AINT 1 "int_reg_operand" "") ;; operand
(match_operand:SI 2 "const_int_operand" "")] ;; model
""
{
@@ -243,10 +317,10 @@
})
(define_expand "atomic_fetch_<fetchop_name><mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 1)
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 1)
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -256,9 +330,9 @@
})
(define_expand "atomic_fetch_nand<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -268,10 +342,10 @@
})
(define_expand "atomic_<fetchop_name>_fetch<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 1)
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 1)
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -281,9 +355,9 @@
})
(define_expand "atomic_nand_fetch<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux
index 017a293cde3..62a5b941389 100644
--- a/gcc/config/rs6000/t-linux
+++ b/gcc/config/rs6000/t-linux
@@ -2,7 +2,7 @@
# or soft-float.
ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float)))
ifneq (,$(findstring spe,$(target)))
-MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1)
+MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring 8548,$(with_cpu)),,v1)
else
MULTIARCH_DIRNAME = powerpc-linux-gnu
endif
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index 8b8b3427454..6cfebdeebdc 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -126,7 +126,9 @@
(match_operand:VEC_L 1 "input_operand" ""))]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])
+ && !quad_load_store_p (operands[0], operands[1])"
[(pc)]
{
rs6000_split_multireg_move (operands[0], operands[1]);
@@ -730,9 +732,10 @@
"")
(define_expand "and<mode>3"
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
- (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
+ [(parallel [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+ (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+ (match_operand:VEC_L 2 "vlogical_operand" "")))
+ (clobber (match_scratch:CC 3 ""))])]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
@@ -746,8 +749,8 @@
(define_expand "nor<mode>3"
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
- (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
- (match_operand:VEC_L 2 "vlogical_operand" ""))))]
+ (and:VEC_L (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" ""))
+ (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))))]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
@@ -760,6 +763,47 @@
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
+;; Power8 vector logical instructions.
+(define_expand "eqv<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (not:VEC_L
+ (xor:VEC_L (match_operand:VEC_L 1 "register_operand" "")
+ (match_operand:VEC_L 2 "register_operand" ""))))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Rewrite nand into canonical form
+(define_expand "nand<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (ior:VEC_L
+ (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+ (not:VEC_L (match_operand:VEC_L 2 "register_operand" ""))))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; The canonical form is to have the negated elment first, so we need to
+;; reverse arguments.
+(define_expand "orc<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (ior:VEC_L
+ (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+ (match_operand:VEC_L 2 "register_operand" "")))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Vector count leading zeros
+(define_expand "clz<mode>2"
+ [(set (match_operand:VEC_I 0 "register_operand" "")
+ (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+ "TARGET_P8_VECTOR")
+
+;; Vector population count
+(define_expand "popcount<mode>2"
+ [(set (match_operand:VEC_I 0 "register_operand" "")
+ (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+ "TARGET_P8_VECTOR")
+
+
;; Same size conversions
(define_expand "float<VEC_int><mode>2"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4adf6e5ac55..b87da826a95 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -36,6 +36,10 @@
;; Iterator for logical types supported by VSX
(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+;; Like VSX_L, but don't support TImode for doing logical instructions in
+;; 32-bit
+(define_mode_iterator VSX_L2 [V16QI V8HI V4SI V2DI V4SF V2DF])
+
;; Iterator for memory move. Handle TImode specially to allow
;; it to use gprs as well as vsx registers.
(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -191,6 +195,8 @@
UNSPEC_VSX_CVDPSXWS
UNSPEC_VSX_CVDPUXWS
UNSPEC_VSX_CVSPDP
+ UNSPEC_VSX_CVSPDPN
+ UNSPEC_VSX_CVDPSPN
UNSPEC_VSX_CVSXWDP
UNSPEC_VSX_CVUXWDP
UNSPEC_VSX_CVSXDSP
@@ -207,112 +213,31 @@
;; VSX moves
(define_insn "*vsx_mov<mode>"
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
- (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
"VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
{
- switch (which_alternative)
- {
- case 0:
- case 3:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "stx<VSm>x %x1,%y0";
-
- case 1:
- case 4:
- gcc_assert (MEM_P (operands[1])
- && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
- return "lx<VSm>x %x0,%y1";
-
- case 2:
- case 5:
- return "xxlor %x0,%x1,%x1";
-
- case 6:
- case 7:
- case 8:
- case 11:
- return "#";
-
- case 9:
- case 10:
- return "xxlxor %x0,%x0,%x0";
-
- case 12:
- return output_vec_const_move (operands);
-
- case 13:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "stvx %1,%y0";
-
- case 14:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "lvx %0,%y1";
-
- default:
- gcc_unreachable ();
- }
+ return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+ (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
;; use of TImode is for unions. However for plain data movement, slightly
;; favor the vector loads
(define_insn "*vsx_movti_64bit"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,?Y,?r,?r,?r")
- (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v, r, Y, r, n"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+ (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
&& (register_operand (operands[0], TImode)
|| register_operand (operands[1], TImode))"
{
- switch (which_alternative)
- {
- case 0:
- return "stxvd2x %x1,%y0";
-
- case 1:
- return "lxvd2x %x0,%y1";
-
- case 2:
- return "xxlor %x0,%x1,%x1";
-
- case 3:
- return "xxlxor %x0,%x0,%x0";
-
- case 4:
- return output_vec_const_move (operands);
-
- case 5:
- return "stvx %1,%y0";
-
- case 6:
- return "lvx %0,%y1";
-
- case 7:
- case 8:
- case 9:
- case 10:
- return "#";
-
- default:
- gcc_unreachable ();
- }
+ return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,*,*,*,*")
- (set_attr "length" " 4, 4, 4, 4, 8, 4, 4,8,8,8,8")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+ (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
(define_insn "*vsx_movti_32bit"
[(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
@@ -1003,6 +928,40 @@
"xscvspdp %x0,%x1"
[(set_attr "type" "fp")])
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
+(define_insn "vsx_xscvdpspn"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_CVDPSPN))]
+ "TARGET_XSCVDPSPN"
+ "xscvdpspn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+ UNSPEC_VSX_CVSPDPN))]
+ "TARGET_XSCVSPDPN"
+ "xscvspdpn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+ (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+ UNSPEC_VSX_CVDPSPN))]
+ "TARGET_XSCVDPSPN"
+ "xscvdpspn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+;; Used by direct move to move a SFmode value from GPR to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSPDPN))]
+ "TARGET_XSCVSPDPN"
+ "xscvspdpn %x0,%x1"
+ [(set_attr "type" "fp")])
+
;; Convert from 64-bit to 32-bit types
;; Note, favor the Altivec registers since the usual use of these instructions
;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1088,70 +1047,368 @@
(set_attr "fp_type" "<VSfptype_simple>")])
-;; Logical operations
-;; Do not support TImode logical instructions on 32-bit at present, because the
-;; compiler will see that we have a TImode and when it wanted DImode, and
-;; convert the DImode to TImode, store it on the stack, and load it in a VSX
-;; register.
-(define_insn "*vsx_and<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (and:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+;; Logical operations. Do not support TImode logical instructions on 32-bit at
+;; present, because the compiler will see that we have a TImode and when it
+;; wanted DImode, and convert the DImode to TImode, store it on the stack, and
+;; load it in a VSX register or generate extra logical instructions in GPR
+;; registers.
+
+;; When we are splitting the operations to GPRs, we use three alternatives, two
+;; where the first/second inputs and output are in the same register, and the
+;; third where the output specifies an early clobber so that we don't have to
+;; worry about overlapping registers.
+
+(define_insn "*vsx_and<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))
+ (clobber (match_scratch:CC 3 "X"))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxland %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_ior<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_and<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+ (and:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r")
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))
+ (clobber (match_scratch:CC 3 "X,X,X,X"))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxland %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(parallel [(set (match_dup 4) (and:DI (match_dup 5) (match_dup 6)))
+ (clobber (match_dup 3))])
+ (parallel [(set (match_dup 7) (and:DI (match_dup 8) (match_dup 9)))
+ (clobber (match_dup 3))])]
+{
+ operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[7] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[9] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_ior<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_ior<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+ (ior:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+ (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlor %x0,%x1,%x2
+ #
+ #
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(const_int 0)]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
-(define_insn "*vsx_xor<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (xor:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+ if (operands[5] == constm1_rtx)
+ emit_move_insn (operands[3], constm1_rtx);
+
+ else if (operands[5] == const0_rtx)
+ {
+ if (!rtx_equal_p (operands[3], operands[4]))
+ emit_move_insn (operands[3], operands[4]);
+ }
+ else
+ emit_insn (gen_iordi3 (operands[3], operands[4], operands[5]));
+
+ if (operands[8] == constm1_rtx)
+ emit_move_insn (operands[8], constm1_rtx);
+
+ else if (operands[8] == const0_rtx)
+ {
+ if (!rtx_equal_p (operands[6], operands[7]))
+ emit_move_insn (operands[6], operands[7]);
+ }
+ else
+ emit_insn (gen_iordi3 (operands[6], operands[7], operands[8]));
+ DONE;
+}
+ [(set_attr "type" "vecsimple,two,two,two,three,three")
+ (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_xor<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_POWERPC64"
"xxlxor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_one_cmpl<mode>2"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (not:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_xor<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+ (xor:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+ (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlxor %x0,%x1,%x2
+ #
+ #
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (xor:DI (match_dup 4) (match_dup 5)))
+ (set (match_dup 6) (xor:DI (match_dup 7) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two,three,three")
+ (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_one_cmpl<mode>2_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlnor %x0,%x1,%x1"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_one_cmpl<mode>2_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,&?r")
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnor %x0,%x1,%x1
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 2) (not:DI (match_dup 3)))
+ (set (match_dup 4) (not:DI (match_dup 5)))]
+{
+ operands[2] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[3] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two")
+ (set_attr "length" "4,8,8")])
-(define_insn "*vsx_nor<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (not:VSX_L
- (ior:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn "*vsx_nor<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L 1 "vlogical_operand" "%wa"))
+ (not:VSX_L2 (match_operand:VSX_L 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlnor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nor<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+ (and:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r"))
+ (not:VSX_L (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnor %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+ (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_andc<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2
+ (not:VSX_L2
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa"))
+ (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlandc %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_andc<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+(define_insn_and_split "*vsx_andc<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
(and:VSX_L
(not:VSX_L
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
- "xxlandc %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,0,r,r"))
+ (match_operand:VSX_L 1 "vlogical_operand" "wa,r,0,r")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlandc %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (match_dup 5)))
+ (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; Power8 vector logical instructions.
+(define_insn "*vsx_eqv<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (not:VSX_L2
+ (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxleqv %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_eqv<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+ (not:VSX_L
+ (xor:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r")
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxleqv %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (not:DI (xor:DI (match_dup 4) (match_dup 5))))
+ (set (match_dup 6) (not:DI (xor:DI (match_dup 7) (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; Rewrite nand into canonical form
+(define_insn "*vsx_nand<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+ (not:VSX_L2 (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlnand %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nand<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "register_operand" "=wa,?r,?r,?r")
+ (ior:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "register_operand" "wa,0,r,r"))
+ (not:VSX_L (match_operand:VSX_L 2 "register_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnand %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+ (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; Rewrite or complement into canonical form, by reversing the arguments
+(define_insn "*vsx_orc<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlorc %x0,%x2,%x1"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_orc<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+ (ior:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r"))
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlorc %x0,%x2,%x1
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (match_dup 5)))
+ (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
;; Permute operations
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 30c34901f8d..358345a4437 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -2017,14 +2017,18 @@ s390_decompose_address (rtx addr, struct s390_address *out)
Thus we don't check the displacement for validity here. If after
elimination the displacement turns out to be invalid after all,
this is fixed up by reload in any case. */
- if (base != arg_pointer_rtx
- && indx != arg_pointer_rtx
- && base != return_address_pointer_rtx
- && indx != return_address_pointer_rtx
- && base != frame_pointer_rtx
- && indx != frame_pointer_rtx
- && base != virtual_stack_vars_rtx
- && indx != virtual_stack_vars_rtx)
+ /* LRA maintains always displacements up to date and we need to
+ know the displacement is right during all LRA not only at the
+ final elimination. */
+ if (lra_in_progress
+ || (base != arg_pointer_rtx
+ && indx != arg_pointer_rtx
+ && base != return_address_pointer_rtx
+ && indx != return_address_pointer_rtx
+ && base != frame_pointer_rtx
+ && indx != frame_pointer_rtx
+ && base != virtual_stack_vars_rtx
+ && indx != virtual_stack_vars_rtx))
if (!DISP_IN_RANGE (offset))
return false;
}
@@ -3189,7 +3193,9 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
/* We need a scratch register when loading a PLUS expression which
is not a legitimate operand of the LOAD ADDRESS instruction. */
- if (in_p && s390_plus_operand (x, mode))
+ /* LRA can deal with transformation of plus op very well -- so we
+ don't need to prompt LRA in this case. */
+ if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
sri->icode = (TARGET_64BIT ?
CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
@@ -7868,6 +7874,13 @@ s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
}
+/* Return true if we use LRA instead of reload pass. */
+static bool
+s390_lra_p (void)
+{
+ return s390_lra_flag;
+}
+
/* Return true if register FROM can be eliminated via register TO. */
static bool
@@ -11105,6 +11118,9 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
+#undef TARGET_LRA_P
+#define TARGET_LRA_P s390_lra_p
+
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE s390_can_eliminate
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 43e24d5d112..b0e530f0ed4 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -221,7 +221,7 @@ enum processor_flags
/* Alignment on even addresses for LARL instruction. */
#define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
-#define DATA_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
/* Alignment is not required by the hardware. */
#define STRICT_ALIGNMENT 0
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index cad4f5f579a..0141b9813ef 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -277,7 +277,8 @@
(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12"
(const (symbol_ref "s390_tune_attr")))
-(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12"
+(define_attr "cpu_facility"
+ "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12"
(const_string "standard"))
(define_attr "enabled" ""
@@ -304,6 +305,10 @@
(match_test "TARGET_DFP"))
(const_int 1)
+ (and (eq_attr "cpu_facility" "cpu_zarch")
+ (match_test "TARGET_CPU_ZARCH"))
+ (const_int 1)
+
(and (eq_attr "cpu_facility" "z10")
(match_test "TARGET_Z10"))
(const_int 1)
@@ -2690,7 +2695,7 @@
"(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
"#"
[(set_attr "type" "cs")
- (set_attr "cpu_facility" "*,*,z10,*")])
+ (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
(define_split
[(set (match_operand:BLK 0 "memory_operand" "")
@@ -2899,7 +2904,7 @@
"(GET_MODE (operands[1]) == Pmode || GET_MODE (operands[1]) == VOIDmode)"
"#"
[(set_attr "type" "cs")
- (set_attr "cpu_facility" "*,*,z10,*")])
+ (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
(define_split
[(set (match_operand:BLK 0 "memory_operand" "")
@@ -3075,7 +3080,7 @@
"(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
"#"
[(set_attr "type" "cs")
- (set_attr "cpu_facility" "*,*,z10,*")])
+ (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
(define_split
[(set (reg:CCU CC_REGNUM)
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index b326441173c..ba38e6e14ed 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -149,3 +149,7 @@ Target Report Joined RejectNegative UInteger Var(s390_branch_cost) Init(1)
Set the branch costs for conditional branch instructions. Reasonable
values are small, non-negative integers. The default branch cost is
1.
+
+mlra
+Target Report Var(s390_lra_flag) Init(1) Save
+Use LRA instead of reload