summaryrefslogtreecommitdiff
path: root/gcc/config
diff options
context:
space:
mode:
authorAldy Hernandez <aldyh@redhat.com>2013-06-28 09:57:43 -0700
committerAldy Hernandez <aldyh@redhat.com>2013-06-28 09:57:43 -0700
commit7fb75753fa7e7c54af3b5e0aea65d8051feac55d (patch)
tree568d89cbf5521cbb882c33a3a42fb332ff2e49b8 /gcc/config
parentdb2127098137dea6c246041e0d763a57a174fa3c (diff)
parent2814409c2f46b5f71706f08358f395dddc9d8a81 (diff)
downloadgcc-7fb75753fa7e7c54af3b5e0aea65d8051feac55d.tar.gz
Merge remote-tracking branch 'origin/gomp-4_0-branch' into cilk-in-gomp
Conflicts: gcc/Makefile.in gcc/c-family/c-common.h gcc/c/c-parser.c gcc/cp/Make-lang.in gcc/cp/cp-tree.h gcc/gimple.h gcc/omp-low.c gcc/testsuite/g++.dg/cilk-plus/cilk-plus.exp gcc/testsuite/gcc.dg/cilk-plus/cilk-plus.exp
Diffstat (limited to 'gcc/config')
-rw-r--r--gcc/config/aarch64/aarch64-builtins.c10
-rw-r--r--gcc/config/aarch64/aarch64-protos.h22
-rw-r--r--gcc/config/aarch64/aarch64-simd-builtins.def3
-rw-r--r--gcc/config/aarch64/aarch64-simd.md210
-rw-r--r--gcc/config/aarch64/aarch64.c476
-rw-r--r--gcc/config/aarch64/aarch64.md95
-rw-r--r--gcc/config/aarch64/arm_neon.h16
-rw-r--r--gcc/config/aarch64/constraints.md10
-rw-r--r--gcc/config/aarch64/iterators.md3
-rw-r--r--gcc/config/aarch64/predicates.md18
-rw-r--r--gcc/config/alpha/alpha.c21
-rw-r--r--gcc/config/alpha/alpha.md1
-rw-r--r--gcc/config/arm/arm-fixed.md38
-rw-r--r--gcc/config/arm/arm-generic.md15
-rw-r--r--gcc/config/arm/arm-ldmstm.ml7
-rw-r--r--gcc/config/arm/arm-protos.h10
-rw-r--r--gcc/config/arm/arm.c287
-rw-r--r--gcc/config/arm/arm.h5
-rw-r--r--gcc/config/arm/arm.md733
-rw-r--r--gcc/config/arm/arm.opt4
-rw-r--r--gcc/config/arm/arm1020e.md12
-rw-r--r--gcc/config/arm/arm1026ejs.md12
-rw-r--r--gcc/config/arm/arm1136jfs.md15
-rw-r--r--gcc/config/arm/arm926ejs.md12
-rw-r--r--gcc/config/arm/constraints.md21
-rw-r--r--gcc/config/arm/cortex-a15.md14
-rw-r--r--gcc/config/arm/cortex-a5.md3
-rw-r--r--gcc/config/arm/cortex-a53.md7
-rw-r--r--gcc/config/arm/cortex-a7.md7
-rw-r--r--gcc/config/arm/cortex-a8.md10
-rw-r--r--gcc/config/arm/cortex-a9.md10
-rw-r--r--gcc/config/arm/cortex-m4.md5
-rw-r--r--gcc/config/arm/cortex-r4.md16
-rw-r--r--gcc/config/arm/fa526.md4
-rw-r--r--gcc/config/arm/fa606te.md8
-rw-r--r--gcc/config/arm/fa626te.md8
-rw-r--r--gcc/config/arm/fa726te.md2
-rw-r--r--gcc/config/arm/fmp626.md8
-rw-r--r--gcc/config/arm/iterators.md8
-rw-r--r--gcc/config/arm/ldmstm.md72
-rw-r--r--gcc/config/arm/marvell-pj4.md8
-rw-r--r--gcc/config/arm/neon.md60
-rw-r--r--gcc/config/arm/predicates.md49
-rw-r--r--gcc/config/arm/sync.md30
-rw-r--r--gcc/config/arm/t-arm4
-rw-r--r--gcc/config/arm/thumb2.md77
-rw-r--r--gcc/config/arm/vfp.md115
-rw-r--r--gcc/config/c6x/c6x.h2
-rw-r--r--gcc/config/i386/ammintrin.h15
-rw-r--r--gcc/config/i386/avx2intrin.h16
-rw-r--r--gcc/config/i386/avxintrin.h16
-rw-r--r--gcc/config/i386/bmi2intrin.h15
-rw-r--r--gcc/config/i386/bmiintrin.h27
-rw-r--r--gcc/config/i386/driver-i386.c10
-rw-r--r--gcc/config/i386/emmintrin.h15
-rw-r--r--gcc/config/i386/f16cintrin.h16
-rw-r--r--gcc/config/i386/fma4intrin.h15
-rw-r--r--gcc/config/i386/fmaintrin.h11
-rw-r--r--gcc/config/i386/fxsrintrin.h12
-rw-r--r--gcc/config/i386/gnu.h8
-rw-r--r--gcc/config/i386/i386-c.c20
-rw-r--r--gcc/config/i386/i386-protos.h4
-rw-r--r--gcc/config/i386/i386.c588
-rw-r--r--gcc/config/i386/i386.h19
-rw-r--r--gcc/config/i386/i386.md39
-rw-r--r--gcc/config/i386/ia32intrin.h26
-rw-r--r--gcc/config/i386/immintrin.h69
-rw-r--r--gcc/config/i386/lwpintrin.h11
-rw-r--r--gcc/config/i386/lzcntintrin.h14
-rw-r--r--gcc/config/i386/mm3dnow.h14
-rw-r--r--gcc/config/i386/mmintrin.h36
-rw-r--r--gcc/config/i386/nmmintrin.h4
-rw-r--r--gcc/config/i386/pmmintrin.h15
-rw-r--r--gcc/config/i386/popcntintrin.h15
-rw-r--r--gcc/config/i386/prfchwintrin.h15
-rw-r--r--gcc/config/i386/rdseedintrin.h14
-rw-r--r--gcc/config/i386/rtmintrin.h15
-rw-r--r--gcc/config/i386/slm.md758
-rw-r--r--gcc/config/i386/smmintrin.h50
-rw-r--r--gcc/config/i386/t-cygming2
-rw-r--r--gcc/config/i386/t-interix2
-rw-r--r--gcc/config/i386/tbmintrin.h16
-rw-r--r--gcc/config/i386/tmmintrin.h15
-rw-r--r--gcc/config/i386/winnt.c42
-rw-r--r--gcc/config/i386/wmmintrin.h30
-rw-r--r--gcc/config/i386/x86intrin.h44
-rw-r--r--gcc/config/i386/xmmintrin.h18
-rw-r--r--gcc/config/i386/xopintrin.h15
-rw-r--r--gcc/config/i386/xsaveintrin.h11
-rw-r--r--gcc/config/i386/xsaveoptintrin.h11
-rw-r--r--gcc/config/i386/xtestintrin.h15
-rw-r--r--gcc/config/ia64/ia64.c77
-rw-r--r--gcc/config/ia64/t-ia643
-rw-r--r--gcc/config/mips/constraints.md6
-rw-r--r--gcc/config/mips/mips-cpus.def5
-rw-r--r--gcc/config/mips/mips-dsp.md9
-rw-r--r--gcc/config/mips/mips-ps-3d.md4
-rw-r--r--gcc/config/mips/mips-tables.opt294
-rw-r--r--gcc/config/mips/mips.c85
-rw-r--r--gcc/config/mips/mips.h122
-rw-r--r--gcc/config/mips/mips.md394
-rw-r--r--gcc/config/mips/mips.opt4
-rw-r--r--gcc/config/mips/mti-linux.h2
-rw-r--r--gcc/config/mips/n32-elf.h35
-rw-r--r--gcc/config/mips/sde.h17
-rw-r--r--gcc/config/mips/t-mti-elf33
-rw-r--r--gcc/config/mips/t-mti-linux19
-rw-r--r--gcc/config/mmix/mmix.c2
-rw-r--r--gcc/config/mmix/mmix.h2
-rw-r--r--gcc/config/rl78/rl78.c19
-rw-r--r--gcc/config/rl78/rl78.md70
-rw-r--r--gcc/config/rs6000/altivec.h36
-rw-r--r--gcc/config/rs6000/altivec.md512
-rw-r--r--gcc/config/rs6000/constraints.md19
-rw-r--r--gcc/config/rs6000/crypto.md101
-rw-r--r--gcc/config/rs6000/driver-rs6000.c2
-rw-r--r--gcc/config/rs6000/linux64.h7
-rw-r--r--gcc/config/rs6000/power8.md373
-rw-r--r--gcc/config/rs6000/predicates.md147
-rw-r--r--gcc/config/rs6000/rs6000-builtin.def239
-rw-r--r--gcc/config/rs6000/rs6000-c.c611
-rw-r--r--gcc/config/rs6000/rs6000-cpus.def20
-rw-r--r--gcc/config/rs6000/rs6000-opts.h24
-rw-r--r--gcc/config/rs6000/rs6000-protos.h4
-rw-r--r--gcc/config/rs6000/rs6000.c1553
-rw-r--r--gcc/config/rs6000/rs6000.h141
-rw-r--r--gcc/config/rs6000/rs6000.md479
-rw-r--r--gcc/config/rs6000/rs6000.opt26
-rw-r--r--gcc/config/rs6000/spe.md8
-rw-r--r--gcc/config/rs6000/sync.md166
-rw-r--r--gcc/config/rs6000/t-linux2
-rw-r--r--gcc/config/rs6000/t-rs60002
-rw-r--r--gcc/config/rs6000/vector.md70
-rw-r--r--gcc/config/rs6000/vsx.md543
-rw-r--r--gcc/config/rx/rx-opts.h3
-rw-r--r--gcc/config/rx/rx.c46
-rw-r--r--gcc/config/rx/rx.h20
-rw-r--r--gcc/config/rx/rx.md20
-rw-r--r--gcc/config/rx/rx.opt7
-rw-r--r--gcc/config/rx/t-rx2
-rw-r--r--gcc/config/s390/htmintrin.h57
-rw-r--r--gcc/config/s390/htmxlintrin.h182
-rw-r--r--gcc/config/s390/predicates.md12
-rw-r--r--gcc/config/s390/s390-modes.def9
-rw-r--r--gcc/config/s390/s390-protos.h3
-rw-r--r--gcc/config/s390/s390.c636
-rw-r--r--gcc/config/s390/s390.h39
-rw-r--r--gcc/config/s390/s390.md294
-rw-r--r--gcc/config/s390/s390.opt8
-rw-r--r--gcc/config/s390/s390intrin.h33
-rw-r--r--gcc/config/sh/predicates.md42
-rw-r--r--gcc/config/sh/sh.c6
-rw-r--r--gcc/config/sh/sh.md45
-rw-r--r--gcc/config/sol2.c53
-rw-r--r--gcc/config/sparc/sparc.c8
-rw-r--r--gcc/config/sparc/sparc.md44
-rw-r--r--gcc/config/sparc/sparc.opt4
-rw-r--r--gcc/config/t-sol22
158 files changed, 9750 insertions, 2818 deletions
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 4fdfe247a21..2a0e5fdc391 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -1245,6 +1245,16 @@ aarch64_builtin_vectorized_function (tree fndecl, tree type_out, tree type_in)
return AARCH64_FIND_FRINT_VARIANT (sqrt);
#undef AARCH64_CHECK_BUILTIN_MODE
#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
+ (out_mode == SImode && out_n == C \
+ && in_mode == N##Imode && in_n == C)
+ case BUILT_IN_CLZ:
+ {
+ if (AARCH64_CHECK_BUILTIN_MODE (4, S))
+ return aarch64_builtin_decls[AARCH64_SIMD_BUILTIN_clzv4si];
+ return NULL_TREE;
+ }
+#undef AARCH64_CHECK_BUILTIN_MODE
+#define AARCH64_CHECK_BUILTIN_MODE(C, N) \
(out_mode == N##Imode && out_n == C \
&& in_mode == N##Fmode && in_n == C)
case BUILT_IN_LFLOOR:
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index 001842e43b4..e5ae556736c 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -68,6 +68,13 @@ enum aarch64_symbol_context
Each of of these represents a thread-local symbol, and corresponds to the
thread local storage relocation operator for the symbol being referred to.
+ SYMBOL_TINY_ABSOLUTE
+
+ Generate symbol accesses as a PC relative address using a single
+ instruction. To compute the address of symbol foo, we generate:
+
+ ADR x0, foo
+
SYMBOL_FORCE_TO_MEM : Global variables are addressed using
constant pool. All variable addresses are spilled into constant
pools. The constant pools themselves are addressed using PC
@@ -81,6 +88,7 @@ enum aarch64_symbol_type
SYMBOL_SMALL_TLSDESC,
SYMBOL_SMALL_GOTTPREL,
SYMBOL_SMALL_TPREL,
+ SYMBOL_TINY_ABSOLUTE,
SYMBOL_FORCE_TO_MEM
};
@@ -136,6 +144,8 @@ struct tune_params
HOST_WIDE_INT aarch64_initial_elimination_offset (unsigned, unsigned);
bool aarch64_bitmask_imm (HOST_WIDE_INT val, enum machine_mode);
+enum aarch64_symbol_type
+aarch64_classify_symbolic_expression (rtx, enum aarch64_symbol_context);
bool aarch64_constant_address_p (rtx);
bool aarch64_float_const_zero_rtx_p (rtx);
bool aarch64_function_arg_regno_p (unsigned);
@@ -146,6 +156,10 @@ bool aarch64_is_long_call_p (rtx);
bool aarch64_label_mentioned_p (rtx);
bool aarch64_legitimate_pic_operand_p (rtx);
bool aarch64_move_imm (HOST_WIDE_INT, enum machine_mode);
+bool aarch64_mov_operand_p (rtx, enum aarch64_symbol_context,
+ enum machine_mode);
+char *aarch64_output_scalar_simd_mov_immediate (rtx, enum machine_mode);
+char *aarch64_output_simd_mov_immediate (rtx, enum machine_mode, unsigned);
bool aarch64_pad_arg_upward (enum machine_mode, const_tree);
bool aarch64_pad_reg_upward (enum machine_mode, const_tree, bool);
bool aarch64_regno_ok_for_base_p (int, bool);
@@ -154,9 +168,9 @@ bool aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode);
bool aarch64_simd_imm_zero_p (rtx, enum machine_mode);
bool aarch64_simd_scalar_immediate_valid_for_move (rtx, enum machine_mode);
bool aarch64_simd_shift_imm_p (rtx, enum machine_mode, bool);
+bool aarch64_simd_valid_immediate (rtx, enum machine_mode, bool,
+ struct simd_immediate_info *);
bool aarch64_symbolic_address_p (rtx);
-bool aarch64_symbolic_constant_p (rtx, enum aarch64_symbol_context,
- enum aarch64_symbol_type *);
bool aarch64_uimm12_shift (HOST_WIDE_INT);
const char *aarch64_output_casesi (rtx *);
enum aarch64_symbol_type aarch64_classify_symbol (rtx,
@@ -219,6 +233,8 @@ void aarch64_split_128bit_move (rtx, rtx);
bool aarch64_split_128bit_move_p (rtx, rtx);
+void aarch64_split_simd_combine (rtx, rtx, rtx);
+
void aarch64_split_simd_move (rtx, rtx);
/* Check for a legitimate floating point constant for FMOV. */
@@ -254,6 +270,4 @@ extern void aarch64_split_combinev16qi (rtx operands[3]);
extern void aarch64_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel);
extern bool
aarch64_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel);
-
-char* aarch64_output_simd_mov_immediate (rtx *, enum machine_mode, unsigned);
#endif /* GCC_AARCH64_PROTOS_H */
diff --git a/gcc/config/aarch64/aarch64-simd-builtins.def b/gcc/config/aarch64/aarch64-simd-builtins.def
index e4201732bcd..4d9b966d0ac 100644
--- a/gcc/config/aarch64/aarch64-simd-builtins.def
+++ b/gcc/config/aarch64/aarch64-simd-builtins.def
@@ -49,6 +49,7 @@
BUILTIN_VDQF (UNOP, sqrt, 2)
BUILTIN_VD_BHSI (BINOP, addp, 0)
VAR1 (UNOP, addp, 0, di)
+ VAR1 (UNOP, clz, 2, v4si)
BUILTIN_VD_RE (REINTERP, reinterpretdi, 0)
BUILTIN_VDC (REINTERP, reinterpretv8qi, 0)
@@ -63,7 +64,7 @@
BUILTIN_VQ (REINTERP, reinterpretv2df, 0)
BUILTIN_VDQ_I (BINOP, dup_lane, 0)
- BUILTIN_SDQ_I (BINOP, dup_lane, 0)
+ BUILTIN_VDQ_I (BINOP, dup_lane_scalar, 0)
/* Implemented by aarch64_<sur>q<r>shl<mode>. */
BUILTIN_VSDQ_I (BINOP, sqshl, 0)
BUILTIN_VSDQ_I (BINOP, uqshl, 0)
diff --git a/gcc/config/aarch64/aarch64-simd.md b/gcc/config/aarch64/aarch64-simd.md
index 9069a73c46c..08826b5dd9f 100644
--- a/gcc/config/aarch64/aarch64-simd.md
+++ b/gcc/config/aarch64/aarch64-simd.md
@@ -357,29 +357,18 @@
(set_attr "simd_mode" "<MODE>")]
)
-(define_insn "aarch64_dup_lane<mode>"
- [(set (match_operand:ALLX 0 "register_operand" "=w")
+(define_insn "aarch64_dup_lane_scalar<mode>"
+ [(set (match_operand:<VEL> 0 "register_operand" "=w, r")
(vec_select:<VEL>
- (match_operand:<VCON> 1 "register_operand" "w")
- (parallel [(match_operand:SI 2 "immediate_operand" "i")])
+ (match_operand:VDQ 1 "register_operand" "w, w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i, i")])
))]
"TARGET_SIMD"
- "dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]"
- [(set_attr "simd_type" "simd_dup")
- (set_attr "simd_mode" "<MODE>")]
-)
-
-(define_insn "aarch64_dup_lanedi"
- [(set (match_operand:DI 0 "register_operand" "=w,r")
- (vec_select:DI
- (match_operand:V2DI 1 "register_operand" "w,w")
- (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
- "TARGET_SIMD"
"@
- dup\\t%<v>0<Vmtype>, %1.<Vetype>[%2]
- umov\t%0, %1.d[%2]"
- [(set_attr "simd_type" "simd_dup")
- (set_attr "simd_mode" "DI")]
+ dup\\t%<Vetype>0, %1.<Vetype>[%2]
+ umov\\t%<vw>0, %1.<Vetype>[%2]"
+ [(set_attr "simd_type" "simd_dup, simd_movgp")
+ (set_attr "simd_mode" "<MODE>")]
)
(define_insn "aarch64_simd_dup<mode>"
@@ -409,7 +398,7 @@
case 4: return "ins\t%0.d[0], %1";
case 5: return "mov\t%0, %1";
case 6:
- return aarch64_output_simd_mov_immediate (&operands[1],
+ return aarch64_output_simd_mov_immediate (operands[1],
<MODE>mode, 64);
default: gcc_unreachable ();
}
@@ -440,7 +429,7 @@
case 5:
return "#";
case 6:
- return aarch64_output_simd_mov_immediate (&operands[1], <MODE>mode, 128);
+ return aarch64_output_simd_mov_immediate (operands[1], <MODE>mode, 128);
default:
gcc_unreachable ();
}
@@ -1058,9 +1047,9 @@
(vec_duplicate:<VHALF> (const_int 0))))]
"TARGET_SIMD"
"@
- mov\\t%d0, %d1
- fmov\t%d0, %1
- dup\t%d0, %1"
+ dup\\t%d0, %1.d[0]
+ fmov\\t%d0, %1
+ dup\\t%d0, %1"
[(set_attr "v8type" "*,fmov,*")
(set_attr "simd_type" "simd_dup,*,simd_dup")
(set_attr "simd_mode" "<MODE>")
@@ -1190,6 +1179,104 @@
;; Widening arithmetic.
+(define_insn "*aarch64_<su>mlal_lo<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 4 "register_operand" "w")
+ (match_dup 3))))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "<su>mlal\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlal_hi<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 4 "register_operand" "w")
+ (match_dup 3))))
+ (match_operand:<VWIDE> 1 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "<su>mlal2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl_lo<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_lo_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 4 "register_operand" "w")
+ (match_dup 3))))))]
+ "TARGET_SIMD"
+ "<su>mlsl\t%0.<Vwtype>, %2.<Vhalftype>, %4.<Vhalftype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl_hi<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 2 "register_operand" "w")
+ (match_operand:VQW 3 "vect_par_cnst_hi_half" "")))
+ (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
+ (match_operand:VQW 4 "register_operand" "w")
+ (match_dup 3))))))]
+ "TARGET_SIMD"
+ "<su>mlsl2\t%0.<Vwtype>, %2.<Vtype>, %4.<Vtype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlal<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (plus:<VWIDE>
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VDW 1 "register_operand" "w"))
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VDW 2 "register_operand" "w")))
+ (match_operand:<VWIDE> 3 "register_operand" "0")))]
+ "TARGET_SIMD"
+ "<su>mlal\t%0.<Vwtype>, %1.<Vtype>, %2.<Vtype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
+(define_insn "*aarch64_<su>mlsl<mode>"
+ [(set (match_operand:<VWIDE> 0 "register_operand" "=w")
+ (minus:<VWIDE>
+ (match_operand:<VWIDE> 1 "register_operand" "0")
+ (mult:<VWIDE>
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VDW 2 "register_operand" "w"))
+ (ANY_EXTEND:<VWIDE>
+ (match_operand:VDW 3 "register_operand" "w")))))]
+ "TARGET_SIMD"
+ "<su>mlsl\t%0.<Vwtype>, %2.<Vtype>, %3.<Vtype>"
+ [(set_attr "simd_type" "simd_mlal")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
(define_insn "aarch64_simd_vec_<su>mult_lo_<mode>"
[(set (match_operand:<VWIDE> 0 "register_operand" "=w")
(mult:<VWIDE> (ANY_EXTEND:<VWIDE> (vec_select:<VHALF>
@@ -1611,6 +1698,15 @@
DONE;
})
+(define_insn "clz<mode>2"
+ [(set (match_operand:VDQ_BHSI 0 "register_operand" "=w")
+ (clz:VDQ_BHSI (match_operand:VDQ_BHSI 1 "register_operand" "w")))]
+ "TARGET_SIMD"
+ "clz\\t%0.<Vtype>, %1.<Vtype>"
+ [(set_attr "simd_type" "simd_cls")
+ (set_attr "simd_mode" "<MODE>")]
+)
+
;; 'across lanes' max and min ops.
(define_insn "reduc_<maxmin_uns>_<mode>"
@@ -2209,15 +2305,29 @@
(set_attr "simd_mode" "<MODE>")]
)
-(define_insn "aarch64_combine<mode>"
+(define_insn_and_split "aarch64_combine<mode>"
[(set (match_operand:<VDBL> 0 "register_operand" "=&w")
(vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
(match_operand:VDC 2 "register_operand" "w")))]
"TARGET_SIMD"
- "mov\\t%0.d[0], %1.d[0]\;ins\\t%0.d[1], %2.d[0]"
- [(set_attr "simd_type" "simd_ins")
- (set_attr "simd_mode" "<MODE>")]
-)
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ aarch64_split_simd_combine (operands[0], operands[1], operands[2]);
+ DONE;
+})
+
+(define_expand "aarch64_simd_combine<mode>"
+ [(set (match_operand:<VDBL> 0 "register_operand" "=&w")
+ (vec_concat:<VDBL> (match_operand:VDC 1 "register_operand" "w")
+ (match_operand:VDC 2 "register_operand" "w")))]
+ "TARGET_SIMD"
+ {
+ emit_insn (gen_move_lo_quad_<Vdbl> (operands[0], operands[1]));
+ emit_insn (gen_move_hi_quad_<Vdbl> (operands[0], operands[2]));
+ DONE;
+ })
;; <su><addsub>l<q>.
@@ -3280,7 +3390,8 @@
(COMPARISONS:DI
(match_operand:DI 1 "register_operand" "w,w,r")
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,ZDz,r")
- )))]
+ )))
+ (clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"@
cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
@@ -3291,15 +3402,7 @@
happening in the 'w' constraint cases. */
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
- [(set (reg:CC CC_REGNUM)
- (compare:CC
- (match_dup 1)
- (match_dup 2)))
- (set (match_dup 0)
- (neg:DI
- (COMPARISONS:DI
- (match_operand 3 "cc_register" "")
- (const_int 0))))]
+ [(const_int 0)]
{
enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
@@ -3332,7 +3435,8 @@
(UCOMPARISONS:DI
(match_operand:DI 1 "register_operand" "w,r")
(match_operand:DI 2 "aarch64_simd_reg_or_zero" "w,r")
- )))]
+ )))
+ (clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"@
cm<n_optab>\t%d0, %d<cmp_1>, %d<cmp_2>
@@ -3342,17 +3446,9 @@
happening in the 'w' constraint cases. */
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
- [(set (reg:CC CC_REGNUM)
- (compare:CC
- (match_dup 1)
- (match_dup 2)))
- (set (match_dup 0)
- (neg:DI
- (UCOMPARISONS:DI
- (match_operand 3 "cc_register" "")
- (const_int 0))))]
+ [(const_int 0)]
{
- enum machine_mode mode = SELECT_CC_MODE (<CMP>, operands[1], operands[2]);
+ enum machine_mode mode = CCmode;
rtx cc_reg = aarch64_gen_compare_reg (<CMP>, operands[1], operands[2]);
rtx comparison = gen_rtx_<CMP> (mode, operands[1], operands[2]);
emit_insn (gen_cstoredi_neg (operands[0], comparison, cc_reg));
@@ -3385,7 +3481,8 @@
(and:DI
(match_operand:DI 1 "register_operand" "w,r")
(match_operand:DI 2 "register_operand" "w,r"))
- (const_int 0))))]
+ (const_int 0))))
+ (clobber (reg:CC CC_REGNUM))]
"TARGET_SIMD"
"@
cmtst\t%d0, %d1, %d2
@@ -3395,16 +3492,7 @@
happening in the 'w' constraint cases. */
&& GP_REGNUM_P (REGNO (operands[0]))
&& GP_REGNUM_P (REGNO (operands[1]))"
- [(set (reg:CC_NZ CC_REGNUM)
- (compare:CC_NZ
- (and:DI (match_dup 1)
- (match_dup 2))
- (const_int 0)))
- (set (match_dup 0)
- (neg:DI
- (ne:DI
- (match_operand 3 "cc_register" "")
- (const_int 0))))]
+ [(const_int 0)]
{
rtx and_tree = gen_rtx_AND (DImode, operands[1], operands[2]);
enum machine_mode mode = SELECT_CC_MODE (NE, and_tree, const0_rtx);
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index a0aff58668b..072f5401279 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -1,4 +1,5 @@
-/* Machine description for AArch64 architecture.
+
+1;3201;0c/* Machine description for AArch64 architecture.
Copyright (C) 2009-2013 Free Software Foundation, Inc.
Contributed by ARM Ltd.
@@ -87,6 +88,14 @@ struct aarch64_address_info {
enum aarch64_symbol_type symbol_type;
};
+struct simd_immediate_info
+{
+ rtx value;
+ int shift;
+ int element_width;
+ bool mvn;
+};
+
/* The current code model. */
enum aarch64_code_model aarch64_cmodel;
@@ -103,8 +112,6 @@ static bool aarch64_vfp_is_call_or_return_candidate (enum machine_mode,
static void aarch64_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
static void aarch64_override_options_after_change (void);
-static int aarch64_simd_valid_immediate (rtx, enum machine_mode, int, rtx *,
- int *, unsigned char *, int *, int *);
static bool aarch64_vector_mode_supported_p (enum machine_mode);
static unsigned bit_count (unsigned HOST_WIDE_INT);
static bool aarch64_const_vec_all_same_int_p (rtx,
@@ -524,13 +531,15 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
return;
}
+ case SYMBOL_TINY_ABSOLUTE:
+ emit_insn (gen_rtx_SET (Pmode, dest, imm));
+ return;
+
case SYMBOL_SMALL_GOT:
{
rtx tmp_reg = dest;
if (can_create_pseudo_p ())
- {
- tmp_reg = gen_reg_rtx (Pmode);
- }
+ tmp_reg = gen_reg_rtx (Pmode);
emit_move_insn (tmp_reg, gen_rtx_HIGH (Pmode, imm));
emit_insn (gen_ldr_got_small (dest, tmp_reg, imm));
return;
@@ -692,6 +701,49 @@ aarch64_split_128bit_move_p (rtx dst, rtx src)
|| ! (FP_REGNUM_P (REGNO (dst)) && FP_REGNUM_P (REGNO (src))));
}
+/* Split a complex SIMD combine. */
+
+void
+aarch64_split_simd_combine (rtx dst, rtx src1, rtx src2)
+{
+ enum machine_mode src_mode = GET_MODE (src1);
+ enum machine_mode dst_mode = GET_MODE (dst);
+
+ gcc_assert (VECTOR_MODE_P (dst_mode));
+
+ if (REG_P (dst) && REG_P (src1) && REG_P (src2))
+ {
+ rtx (*gen) (rtx, rtx, rtx);
+
+ switch (src_mode)
+ {
+ case V8QImode:
+ gen = gen_aarch64_simd_combinev8qi;
+ break;
+ case V4HImode:
+ gen = gen_aarch64_simd_combinev4hi;
+ break;
+ case V2SImode:
+ gen = gen_aarch64_simd_combinev2si;
+ break;
+ case V2SFmode:
+ gen = gen_aarch64_simd_combinev2sf;
+ break;
+ case DImode:
+ gen = gen_aarch64_simd_combinedi;
+ break;
+ case DFmode:
+ gen = gen_aarch64_simd_combinedf;
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ emit_insn (gen (dst, src1, src2));
+ return;
+ }
+}
+
/* Split a complex SIMD move. */
void
@@ -738,10 +790,10 @@ aarch64_split_simd_move (rtx dst, rtx src)
}
static rtx
-aarch64_force_temporary (rtx x, rtx value)
+aarch64_force_temporary (enum machine_mode mode, rtx x, rtx value)
{
if (can_create_pseudo_p ())
- return force_reg (Pmode, value);
+ return force_reg (mode, value);
else
{
x = aarch64_emit_move (x, value);
@@ -753,15 +805,16 @@ aarch64_force_temporary (rtx x, rtx value)
static rtx
aarch64_add_offset (enum machine_mode mode, rtx temp, rtx reg, HOST_WIDE_INT offset)
{
- if (!aarch64_plus_immediate (GEN_INT (offset), DImode))
+ if (!aarch64_plus_immediate (GEN_INT (offset), mode))
{
rtx high;
/* Load the full offset into a register. This
might be improvable in the future. */
high = GEN_INT (offset);
offset = 0;
- high = aarch64_force_temporary (temp, high);
- reg = aarch64_force_temporary (temp, gen_rtx_PLUS (Pmode, high, reg));
+ high = aarch64_force_temporary (mode, temp, high);
+ reg = aarch64_force_temporary (mode, temp,
+ gen_rtx_PLUS (mode, high, reg));
}
return plus_constant (mode, reg, offset);
}
@@ -800,7 +853,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
&& targetm.cannot_force_const_mem (mode, imm))
{
gcc_assert(can_create_pseudo_p ());
- base = aarch64_force_temporary (dest, base);
+ base = aarch64_force_temporary (mode, dest, base);
base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
aarch64_emit_move (dest, base);
return;
@@ -817,7 +870,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
if (offset != const0_rtx)
{
gcc_assert(can_create_pseudo_p ());
- base = aarch64_force_temporary (dest, base);
+ base = aarch64_force_temporary (mode, dest, base);
base = aarch64_add_offset (mode, NULL, base, INTVAL (offset));
aarch64_emit_move (dest, base);
return;
@@ -826,6 +879,7 @@ aarch64_expand_mov_immediate (rtx dest, rtx imm)
case SYMBOL_SMALL_TPREL:
case SYMBOL_SMALL_ABSOLUTE:
+ case SYMBOL_TINY_ABSOLUTE:
aarch64_load_symref_appropriately (dest, imm, sty);
return;
@@ -2634,12 +2688,14 @@ static bool
aarch64_cannot_force_const_mem (enum machine_mode mode ATTRIBUTE_UNUSED, rtx x)
{
rtx base, offset;
+
if (GET_CODE (x) == HIGH)
return true;
split_const (x, &base, &offset);
if (GET_CODE (base) == SYMBOL_REF || GET_CODE (base) == LABEL_REF)
- return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR) != SYMBOL_FORCE_TO_MEM);
+ return (aarch64_classify_symbol (base, SYMBOL_CONTEXT_ADR)
+ != SYMBOL_FORCE_TO_MEM);
return aarch64_tls_referenced_p (x);
}
@@ -3077,10 +3133,13 @@ aarch64_symbolic_address_p (rtx x)
/* Classify the base of symbolic expression X, given that X appears in
context CONTEXT. */
-static enum aarch64_symbol_type
-aarch64_classify_symbolic_expression (rtx x, enum aarch64_symbol_context context)
+
+enum aarch64_symbol_type
+aarch64_classify_symbolic_expression (rtx x,
+ enum aarch64_symbol_context context)
{
rtx offset;
+
split_const (x, &x, &offset);
return aarch64_classify_symbol (x, context);
}
@@ -3428,13 +3487,13 @@ aarch64_print_operand (FILE *f, rtx x, char code)
break;
case 'X':
- /* Print integer constant in hex. */
+ /* Print bottom 16 bits of integer constant in hex. */
if (GET_CODE (x) != CONST_INT)
{
output_operand_lossage ("invalid operand for '%%%c'", code);
return;
}
- asm_fprintf (f, "0x%wx", UINTVAL (x));
+ asm_fprintf (f, "0x%wx", UINTVAL (x) & 0xffff);
break;
case 'w':
@@ -5016,6 +5075,7 @@ aarch64_classify_tls_symbol (rtx x)
/* Return the method that should be used to access SYMBOL_REF or
LABEL_REF X in context CONTEXT. */
+
enum aarch64_symbol_type
aarch64_classify_symbol (rtx x,
enum aarch64_symbol_context context ATTRIBUTE_UNUSED)
@@ -5029,6 +5089,8 @@ aarch64_classify_symbol (rtx x,
case AARCH64_CMODEL_TINY_PIC:
case AARCH64_CMODEL_TINY:
+ return SYMBOL_TINY_ABSOLUTE;
+
case AARCH64_CMODEL_SMALL_PIC:
case AARCH64_CMODEL_SMALL:
return SYMBOL_SMALL_ABSOLUTE;
@@ -5038,70 +5100,46 @@ aarch64_classify_symbol (rtx x,
}
}
- gcc_assert (GET_CODE (x) == SYMBOL_REF);
-
- switch (aarch64_cmodel)
+ if (GET_CODE (x) == SYMBOL_REF)
{
- case AARCH64_CMODEL_LARGE:
- return SYMBOL_FORCE_TO_MEM;
-
- case AARCH64_CMODEL_TINY:
- case AARCH64_CMODEL_SMALL:
-
- /* This is needed to get DFmode, TImode constants to be loaded off
- the constant pool. Is it necessary to dump TImode values into
- the constant pool. We don't handle TImode constant loads properly
- yet and hence need to use the constant pool. */
- if (CONSTANT_POOL_ADDRESS_P (x))
+ if (aarch64_cmodel == AARCH64_CMODEL_LARGE
+ || CONSTANT_POOL_ADDRESS_P (x))
return SYMBOL_FORCE_TO_MEM;
if (aarch64_tls_symbol_p (x))
return aarch64_classify_tls_symbol (x);
- if (SYMBOL_REF_WEAK (x))
- return SYMBOL_FORCE_TO_MEM;
-
- return SYMBOL_SMALL_ABSOLUTE;
-
- case AARCH64_CMODEL_TINY_PIC:
- case AARCH64_CMODEL_SMALL_PIC:
-
- if (CONSTANT_POOL_ADDRESS_P (x))
- return SYMBOL_FORCE_TO_MEM;
+ switch (aarch64_cmodel)
+ {
+ case AARCH64_CMODEL_TINY:
+ if (SYMBOL_REF_WEAK (x))
+ return SYMBOL_FORCE_TO_MEM;
+ return SYMBOL_TINY_ABSOLUTE;
- if (aarch64_tls_symbol_p (x))
- return aarch64_classify_tls_symbol (x);
+ case AARCH64_CMODEL_SMALL:
+ if (SYMBOL_REF_WEAK (x))
+ return SYMBOL_FORCE_TO_MEM;
+ return SYMBOL_SMALL_ABSOLUTE;
- if (!aarch64_symbol_binds_local_p (x))
- return SYMBOL_SMALL_GOT;
+ case AARCH64_CMODEL_TINY_PIC:
+ if (!aarch64_symbol_binds_local_p (x))
+ return SYMBOL_SMALL_GOT;
+ return SYMBOL_TINY_ABSOLUTE;
- return SYMBOL_SMALL_ABSOLUTE;
+ case AARCH64_CMODEL_SMALL_PIC:
+ if (!aarch64_symbol_binds_local_p (x))
+ return SYMBOL_SMALL_GOT;
+ return SYMBOL_SMALL_ABSOLUTE;
- default:
- gcc_unreachable ();
+ default:
+ gcc_unreachable ();
+ }
}
+
/* By default push everything into the constant pool. */
return SYMBOL_FORCE_TO_MEM;
}
-/* Return true if X is a symbolic constant that can be used in context
- CONTEXT. If it is, store the type of the symbol in *SYMBOL_TYPE. */
-
-bool
-aarch64_symbolic_constant_p (rtx x, enum aarch64_symbol_context context,
- enum aarch64_symbol_type *symbol_type)
-{
- rtx offset;
- split_const (x, &x, &offset);
- if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF)
- *symbol_type = aarch64_classify_symbol (x, context);
- else
- return false;
-
- /* No checking of offset at this point. */
- return true;
-}
-
bool
aarch64_constant_address_p (rtx x)
{
@@ -5152,8 +5190,7 @@ aarch64_legitimate_constant_p (enum machine_mode mode, rtx x)
/* This could probably go away because
we now decompose CONST_INTs according to expand_mov_immediate. */
if ((GET_CODE (x) == CONST_VECTOR
- && aarch64_simd_valid_immediate (x, mode, false,
- NULL, NULL, NULL, NULL, NULL) != -1)
+ && aarch64_simd_valid_immediate (x, mode, false, NULL))
|| CONST_INT_P (x) || aarch64_valid_floating_const (mode, x))
return !targetm.cannot_force_const_mem (mode, x);
@@ -5984,32 +6021,57 @@ aarch64_vector_mode_supported_p (enum machine_mode mode)
return false;
}
-/* Return quad mode as the preferred SIMD mode. */
+/* Return appropriate SIMD container
+ for MODE within a vector of WIDTH bits. */
static enum machine_mode
-aarch64_preferred_simd_mode (enum machine_mode mode)
+aarch64_simd_container_mode (enum machine_mode mode, unsigned width)
{
+ gcc_assert (width == 64 || width == 128);
if (TARGET_SIMD)
- switch (mode)
- {
- case DFmode:
- return V2DFmode;
- case SFmode:
- return V4SFmode;
- case SImode:
- return V4SImode;
- case HImode:
- return V8HImode;
- case QImode:
- return V16QImode;
- case DImode:
- return V2DImode;
- break;
-
- default:;
- }
+ {
+ if (width == 128)
+ switch (mode)
+ {
+ case DFmode:
+ return V2DFmode;
+ case SFmode:
+ return V4SFmode;
+ case SImode:
+ return V4SImode;
+ case HImode:
+ return V8HImode;
+ case QImode:
+ return V16QImode;
+ case DImode:
+ return V2DImode;
+ default:
+ break;
+ }
+ else
+ switch (mode)
+ {
+ case SFmode:
+ return V2SFmode;
+ case SImode:
+ return V2SImode;
+ case HImode:
+ return V4HImode;
+ case QImode:
+ return V8QImode;
+ default:
+ break;
+ }
+ }
return word_mode;
}
+/* Return 128-bit container as the preferred SIMD mode for MODE. */
+static enum machine_mode
+aarch64_preferred_simd_mode (enum machine_mode mode)
+{
+ return aarch64_simd_container_mode (mode, 128);
+}
+
/* Return the bitmask of possible vector sizes for the vectorizer
to iterate over. */
static unsigned int
@@ -6097,7 +6159,7 @@ aarch64_mangle_type (const_tree type)
}
/* Return the equivalent letter for size. */
-static unsigned char
+static char
sizetochar (int size)
{
switch (size)
@@ -6144,15 +6206,10 @@ aarch64_vect_float_const_representable_p (rtx x)
return aarch64_float_const_representable_p (x0);
}
-/* TODO: This function returns values similar to those
- returned by neon_valid_immediate in gcc/config/arm/arm.c
- but the API here is different enough that these magic numbers
- are not used. It should be sufficient to return true or false. */
-static int
-aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
- rtx *modconst, int *elementwidth,
- unsigned char *elementchar,
- int *mvn, int *shift)
+/* Return true for valid and false for invalid. */
+bool
+aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, bool inverse,
+ struct simd_immediate_info *info)
{
#define CHECK(STRIDE, ELSIZE, CLASS, TEST, SHIFT, NEG) \
matches = 1; \
@@ -6163,7 +6220,6 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
{ \
immtype = (CLASS); \
elsize = (ELSIZE); \
- elchar = sizetochar (elsize); \
eshift = (SHIFT); \
emvn = (NEG); \
break; \
@@ -6172,36 +6228,25 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
unsigned int i, elsize = 0, idx = 0, n_elts = CONST_VECTOR_NUNITS (op);
unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
unsigned char bytes[16];
- unsigned char elchar = 0;
int immtype = -1, matches;
unsigned int invmask = inverse ? 0xff : 0;
int eshift, emvn;
if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
{
- bool simd_imm_zero = aarch64_simd_imm_zero_p (op, mode);
- int elem_width = GET_MODE_BITSIZE (GET_MODE (CONST_VECTOR_ELT (op, 0)));
-
- if (!(simd_imm_zero
- || aarch64_vect_float_const_representable_p (op)))
- return -1;
-
- if (modconst)
- *modconst = CONST_VECTOR_ELT (op, 0);
-
- if (elementwidth)
- *elementwidth = elem_width;
-
- if (elementchar)
- *elementchar = sizetochar (elem_width);
+ if (! (aarch64_simd_imm_zero_p (op, mode)
+ || aarch64_vect_float_const_representable_p (op)))
+ return false;
- if (shift)
- *shift = 0;
+ if (info)
+ {
+ info->value = CONST_VECTOR_ELT (op, 0);
+ info->element_width = GET_MODE_BITSIZE (GET_MODE (info->value));
+ info->mvn = false;
+ info->shift = 0;
+ }
- if (simd_imm_zero)
- return 19;
- else
- return 18;
+ return true;
}
/* Splat vector constant out into a byte vector. */
@@ -6299,23 +6344,14 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
if (immtype == -1
|| (immtype >= 12 && immtype <= 15)
|| immtype == 18)
- return -1;
-
-
- if (elementwidth)
- *elementwidth = elsize;
-
- if (elementchar)
- *elementchar = elchar;
-
- if (mvn)
- *mvn = emvn;
-
- if (shift)
- *shift = eshift;
+ return false;
- if (modconst)
+ if (info)
{
+ info->element_width = elsize;
+ info->mvn = emvn != 0;
+ info->shift = eshift;
+
unsigned HOST_WIDE_INT imm = 0;
/* Un-invert bytes of recognized vector, if necessary. */
@@ -6332,68 +6368,27 @@ aarch64_simd_valid_immediate (rtx op, enum machine_mode mode, int inverse,
imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
<< (i * BITS_PER_UNIT);
- *modconst = GEN_INT (imm);
- }
- else
- {
- unsigned HOST_WIDE_INT imm = 0;
- for (i = 0; i < elsize / BITS_PER_UNIT; i++)
- imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
+ info->value = GEN_INT (imm);
+ }
+ else
+ {
+ for (i = 0; i < elsize / BITS_PER_UNIT; i++)
+ imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
/* Construct 'abcdefgh' because the assembler cannot handle
- generic constants. */
- gcc_assert (shift != NULL && mvn != NULL);
- if (*mvn)
+ generic constants. */
+ if (info->mvn)
imm = ~imm;
- imm = (imm >> *shift) & 0xff;
- *modconst = GEN_INT (imm);
- }
+ imm = (imm >> info->shift) & 0xff;
+ info->value = GEN_INT (imm);
+ }
}
- return immtype;
+ return true;
#undef CHECK
}
-/* Return TRUE if rtx X is legal for use as either a AdvSIMD MOVI instruction
- (or, implicitly, MVNI) immediate. Write back width per element
- to *ELEMENTWIDTH, and a modified constant (whatever should be output
- for a MOVI instruction) in *MODCONST. */
-int
-aarch64_simd_immediate_valid_for_move (rtx op, enum machine_mode mode,
- rtx *modconst, int *elementwidth,
- unsigned char *elementchar,
- int *mvn, int *shift)
-{
- rtx tmpconst;
- int tmpwidth;
- unsigned char tmpwidthc;
- int tmpmvn = 0, tmpshift = 0;
- int retval = aarch64_simd_valid_immediate (op, mode, 0, &tmpconst,
- &tmpwidth, &tmpwidthc,
- &tmpmvn, &tmpshift);
-
- if (retval == -1)
- return 0;
-
- if (modconst)
- *modconst = tmpconst;
-
- if (elementwidth)
- *elementwidth = tmpwidth;
-
- if (elementchar)
- *elementchar = tmpwidthc;
-
- if (mvn)
- *mvn = tmpmvn;
-
- if (shift)
- *shift = tmpshift;
-
- return 1;
-}
-
static bool
aarch64_const_vec_all_same_int_p (rtx x,
HOST_WIDE_INT minval,
@@ -6455,6 +6450,25 @@ aarch64_simd_imm_scalar_p (rtx x, enum machine_mode mode ATTRIBUTE_UNUSED)
return true;
}
+bool
+aarch64_mov_operand_p (rtx x,
+ enum aarch64_symbol_context context,
+ enum machine_mode mode)
+{
+ if (GET_CODE (x) == HIGH
+ && aarch64_valid_symref (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
+ return true;
+
+ if (CONST_INT_P (x) && aarch64_move_imm (INTVAL (x), mode))
+ return true;
+
+ if (GET_CODE (x) == SYMBOL_REF && mode == DImode && CONSTANT_ADDRESS_P (x))
+ return true;
+
+ return aarch64_classify_symbolic_expression (x, context)
+ == SYMBOL_TINY_ABSOLUTE;
+}
+
/* Return a const_int vector of VAL. */
rtx
aarch64_simd_gen_const_vector_dup (enum machine_mode mode, int val)
@@ -6479,9 +6493,7 @@ aarch64_simd_scalar_immediate_valid_for_move (rtx op, enum machine_mode mode)
gcc_assert (!VECTOR_MODE_P (mode));
vmode = aarch64_preferred_simd_mode (mode);
rtx op_v = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (op));
- int retval = aarch64_simd_immediate_valid_for_move (op_v, vmode, 0,
- NULL, NULL, NULL, NULL);
- return retval;
+ return aarch64_simd_valid_immediate (op_v, vmode, false, NULL);
}
/* Construct and return a PARALLEL RTX vector. */
@@ -6709,8 +6721,7 @@ aarch64_simd_make_constant (rtx vals)
gcc_unreachable ();
if (const_vec != NULL_RTX
- && aarch64_simd_immediate_valid_for_move (const_vec, mode, NULL, NULL,
- NULL, NULL, NULL))
+ && aarch64_simd_valid_immediate (const_vec, mode, false, NULL))
/* Load using MOVI/MVNI. */
return const_vec;
else if ((const_dup = aarch64_simd_dup_constant (vals)) != NULL_RTX)
@@ -7268,49 +7279,78 @@ aarch64_float_const_representable_p (rtx x)
}
char*
-aarch64_output_simd_mov_immediate (rtx *const_vector,
+aarch64_output_simd_mov_immediate (rtx const_vector,
enum machine_mode mode,
unsigned width)
{
- int is_valid;
- unsigned char widthc;
- int lane_width_bits;
+ bool is_valid;
static char templ[40];
- int shift = 0, mvn = 0;
const char *mnemonic;
unsigned int lane_count = 0;
+ char element_char;
+
+ struct simd_immediate_info info;
- is_valid =
- aarch64_simd_immediate_valid_for_move (*const_vector, mode,
- const_vector, &lane_width_bits,
- &widthc, &mvn, &shift);
+ /* This will return true to show const_vector is legal for use as either
+ a AdvSIMD MOVI instruction (or, implicitly, MVNI) immediate. It will
+ also update INFO to show how the immediate should be generated. */
+ is_valid = aarch64_simd_valid_immediate (const_vector, mode, false, &info);
gcc_assert (is_valid);
+ element_char = sizetochar (info.element_width);
+ lane_count = width / info.element_width;
+
mode = GET_MODE_INNER (mode);
if (mode == SFmode || mode == DFmode)
{
- bool zero_p =
- aarch64_float_const_zero_rtx_p (*const_vector);
- gcc_assert (shift == 0);
- mnemonic = zero_p ? "movi" : "fmov";
+ gcc_assert (info.shift == 0 && ! info.mvn);
+ if (aarch64_float_const_zero_rtx_p (info.value))
+ info.value = GEN_INT (0);
+ else
+ {
+#define buf_size 20
+ REAL_VALUE_TYPE r;
+ REAL_VALUE_FROM_CONST_DOUBLE (r, info.value);
+ char float_buf[buf_size] = {'\0'};
+ real_to_decimal_for_mode (float_buf, &r, buf_size, buf_size, 1, mode);
+#undef buf_size
+
+ if (lane_count == 1)
+ snprintf (templ, sizeof (templ), "fmov\t%%d0, %s", float_buf);
+ else
+ snprintf (templ, sizeof (templ), "fmov\t%%0.%d%c, %s",
+ lane_count, element_char, float_buf);
+ return templ;
+ }
}
- else
- mnemonic = mvn ? "mvni" : "movi";
- gcc_assert (lane_width_bits != 0);
- lane_count = width / lane_width_bits;
+ mnemonic = info.mvn ? "mvni" : "movi";
if (lane_count == 1)
- snprintf (templ, sizeof (templ), "%s\t%%d0, %%1", mnemonic);
- else if (shift)
- snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1, lsl %d",
- mnemonic, lane_count, widthc, shift);
+ snprintf (templ, sizeof (templ), "%s\t%%d0, " HOST_WIDE_INT_PRINT_HEX,
+ mnemonic, UINTVAL (info.value));
+ else if (info.shift)
+ snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX
+ ", lsl %d", mnemonic, lane_count, element_char,
+ UINTVAL (info.value), info.shift);
else
- snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, %%1",
- mnemonic, lane_count, widthc);
+ snprintf (templ, sizeof (templ), "%s\t%%0.%d%c, " HOST_WIDE_INT_PRINT_HEX,
+ mnemonic, lane_count, element_char, UINTVAL (info.value));
return templ;
}
+char*
+aarch64_output_scalar_simd_mov_immediate (rtx immediate,
+ enum machine_mode mode)
+{
+ enum machine_mode vmode;
+
+ gcc_assert (!VECTOR_MODE_P (mode));
+ vmode = aarch64_simd_container_mode (mode, 64);
+ rtx v_op = aarch64_simd_gen_const_vector_dup (vmode, INTVAL (immediate));
+ return aarch64_output_simd_mov_immediate (v_op, vmode, 64);
+}
+
/* Split operands into moves from op[1] + op[2] into op[0]. */
void
diff --git a/gcc/config/aarch64/aarch64.md b/gcc/config/aarch64/aarch64.md
index b27bcdaa97c..e88e5be894e 100644
--- a/gcc/config/aarch64/aarch64.md
+++ b/gcc/config/aarch64/aarch64.md
@@ -774,17 +774,34 @@
(match_operand:SHORT 1 "general_operand" " r,M,D<hq>,m, m,rZ,*w,*w, r,*w"))]
"(register_operand (operands[0], <MODE>mode)
|| aarch64_reg_or_zero (operands[1], <MODE>mode))"
- "@
- mov\\t%w0, %w1
- mov\\t%w0, %1
- movi\\t%0.<Vallxd>, %1
- ldr<size>\\t%w0, %1
- ldr\\t%<size>0, %1
- str<size>\\t%w1, %0
- str\\t%<size>1, %0
- umov\\t%w0, %1.<v>[0]
- dup\\t%0.<Vallxd>, %w1
- dup\\t%0, %1.<v>[0]"
+{
+ switch (which_alternative)
+ {
+ case 0:
+ return "mov\t%w0, %w1";
+ case 1:
+ return "mov\t%w0, %1";
+ case 2:
+ return aarch64_output_scalar_simd_mov_immediate (operands[1],
+ <MODE>mode);
+ case 3:
+ return "ldr<size>\t%w0, %1";
+ case 4:
+ return "ldr\t%<size>0, %1";
+ case 5:
+ return "str<size>\t%w1, %0";
+ case 6:
+ return "str\t%<size>1, %0";
+ case 7:
+ return "umov\t%w0, %1.<v>[0]";
+ case 8:
+ return "dup\t%0.<Vallxd>, %w1";
+ case 9:
+ return "dup\t%0, %1.<v>[0]";
+ default:
+ gcc_unreachable ();
+ }
+}
[(set_attr "v8type" "move,alu,alu,load1,load1,store1,store1,*,*,*")
(set_attr "simd_type" "*,*,simd_move_imm,*,*,*,*,simd_movgp,simd_dupgp,simd_dup")
(set_attr "simd" "*,*,yes,*,*,*,*,yes,yes,yes")
@@ -829,8 +846,8 @@
)
(define_insn "*movdi_aarch64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r, r, *w, r,*w,w")
- (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,Usa,Ush,rZ,*w,*w,Dd"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=r,k,r,r,r,*w,m, m,r,r, *w, r,*w,w")
+ (match_operand:DI 1 "aarch64_mov_operand" " r,r,k,N,m, m,rZ,*w,S,Ush,rZ,*w,*w,Dd"))]
"(register_operand (operands[0], DImode)
|| aarch64_reg_or_zero (operands[1], DImode))"
"@
@@ -850,7 +867,8 @@
movi\\t%d0, %1"
[(set_attr "v8type" "move,move,move,alu,load1,load1,store1,store1,adr,adr,fmov,fmov,fmov,fmov")
(set_attr "mode" "DI")
- (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,yes")]
+ (set_attr "fp" "*,*,*,*,*,yes,*,yes,*,*,yes,yes,yes,*")
+ (set_attr "simd" "*,*,*,*,*,*,*,*,*,*,*,*,*,yes")]
)
(define_insn "insv_imm<mode>"
@@ -858,9 +876,8 @@
(const_int 16)
(match_operand:GPI 1 "const_int_operand" "n"))
(match_operand:GPI 2 "const_int_operand" "n"))]
- "INTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
- && INTVAL (operands[1]) % 16 == 0
- && UINTVAL (operands[2]) <= 0xffff"
+ "UINTVAL (operands[1]) < GET_MODE_BITSIZE (<MODE>mode)
+ && UINTVAL (operands[1]) % 16 == 0"
"movk\\t%<w>0, %X2, lsl %1"
[(set_attr "v8type" "movk")
(set_attr "mode" "<MODE>")]
@@ -3164,6 +3181,50 @@
(set_attr "mode" "<MODE>")]
)
+;; Bitfield Insert (insv)
+(define_expand "insv<mode>"
+ [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand")
+ (match_operand 1 "const_int_operand")
+ (match_operand 2 "const_int_operand"))
+ (match_operand:GPI 3 "general_operand"))]
+ ""
+{
+ unsigned HOST_WIDE_INT width = UINTVAL (operands[1]);
+ unsigned HOST_WIDE_INT pos = UINTVAL (operands[2]);
+ rtx value = operands[3];
+
+ if (width == 0 || (pos + width) > GET_MODE_BITSIZE (<MODE>mode))
+ FAIL;
+
+ if (CONST_INT_P (value))
+ {
+ unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT)1 << width) - 1;
+
+ /* Prefer AND/OR for inserting all zeros or all ones. */
+ if ((UINTVAL (value) & mask) == 0
+ || (UINTVAL (value) & mask) == mask)
+ FAIL;
+
+ /* 16-bit aligned 16-bit wide insert is handled by insv_imm. */
+ if (width == 16 && (pos % 16) == 0)
+ DONE;
+ }
+ operands[3] = force_reg (<MODE>mode, value);
+})
+
+(define_insn "*insv_reg<mode>"
+ [(set (zero_extract:GPI (match_operand:GPI 0 "register_operand" "+r")
+ (match_operand 1 "const_int_operand" "n")
+ (match_operand 2 "const_int_operand" "n"))
+ (match_operand:GPI 3 "register_operand" "r"))]
+ "!(UINTVAL (operands[1]) == 0
+ || (UINTVAL (operands[2]) + UINTVAL (operands[1])
+ > GET_MODE_BITSIZE (<MODE>mode)))"
+ "bfi\\t%<w>0, %<w>3, %2, %1"
+ [(set_attr "v8type" "bfm")
+ (set_attr "mode" "<MODE>")]
+)
+
(define_insn "*<optab><ALLX:mode>_shft_<GPI:mode>"
[(set (match_operand:GPI 0 "register_operand" "=r")
(ashift:GPI (ANY_EXTEND:GPI
diff --git a/gcc/config/aarch64/arm_neon.h b/gcc/config/aarch64/arm_neon.h
index 608db35b3dd..760ba3dc1e1 100644
--- a/gcc/config/aarch64/arm_neon.h
+++ b/gcc/config/aarch64/arm_neon.h
@@ -20234,49 +20234,49 @@ vcvtpq_u64_f64 (float64x2_t __a)
__extension__ static __inline int8x1_t __attribute__ ((__always_inline__))
vdupb_lane_s8 (int8x16_t a, int const b)
{
- return __builtin_aarch64_dup_laneqi (a, b);
+ return __builtin_aarch64_dup_lane_scalarv16qi (a, b);
}
__extension__ static __inline uint8x1_t __attribute__ ((__always_inline__))
vdupb_lane_u8 (uint8x16_t a, int const b)
{
- return (uint8x1_t) __builtin_aarch64_dup_laneqi ((int8x16_t) a, b);
+ return (uint8x1_t) __builtin_aarch64_dup_lane_scalarv16qi ((int8x16_t) a, b);
}
__extension__ static __inline int16x1_t __attribute__ ((__always_inline__))
vduph_lane_s16 (int16x8_t a, int const b)
{
- return __builtin_aarch64_dup_lanehi (a, b);
+ return __builtin_aarch64_dup_lane_scalarv8hi (a, b);
}
__extension__ static __inline uint16x1_t __attribute__ ((__always_inline__))
vduph_lane_u16 (uint16x8_t a, int const b)
{
- return (uint16x1_t) __builtin_aarch64_dup_lanehi ((int16x8_t) a, b);
+ return (uint16x1_t) __builtin_aarch64_dup_lane_scalarv8hi ((int16x8_t) a, b);
}
__extension__ static __inline int32x1_t __attribute__ ((__always_inline__))
vdups_lane_s32 (int32x4_t a, int const b)
{
- return __builtin_aarch64_dup_lanesi (a, b);
+ return __builtin_aarch64_dup_lane_scalarv4si (a, b);
}
__extension__ static __inline uint32x1_t __attribute__ ((__always_inline__))
vdups_lane_u32 (uint32x4_t a, int const b)
{
- return (uint32x1_t) __builtin_aarch64_dup_lanesi ((int32x4_t) a, b);
+ return (uint32x1_t) __builtin_aarch64_dup_lane_scalarv4si ((int32x4_t) a, b);
}
__extension__ static __inline int64x1_t __attribute__ ((__always_inline__))
vdupd_lane_s64 (int64x2_t a, int const b)
{
- return __builtin_aarch64_dup_lanedi (a, b);
+ return __builtin_aarch64_dup_lane_scalarv2di (a, b);
}
__extension__ static __inline uint64x1_t __attribute__ ((__always_inline__))
vdupd_lane_u64 (uint64x2_t a, int const b)
{
- return (uint64x1_t) __builtin_aarch64_dup_lanedi ((int64x2_t) a, b);
+ return (uint64x1_t) __builtin_aarch64_dup_lane_scalarv2di ((int64x2_t) a, b);
}
/* vldn */
diff --git a/gcc/config/aarch64/constraints.md b/gcc/config/aarch64/constraints.md
index 18ac16a3160..7cafc08fdd9 100644
--- a/gcc/config/aarch64/constraints.md
+++ b/gcc/config/aarch64/constraints.md
@@ -75,11 +75,6 @@
"Integer constant zero."
(match_test "op == const0_rtx"))
-(define_constraint "Usa"
- "A constraint that matches an absolute symbolic address."
- (and (match_code "const,symbol_ref")
- (match_test "aarch64_symbolic_address_p (op)")))
-
(define_constraint "Ush"
"A constraint that matches an absolute symbolic address high part."
(and (match_code "high")
@@ -148,9 +143,8 @@
"@internal
A constraint that matches vector of immediates."
(and (match_code "const_vector")
- (match_test "aarch64_simd_immediate_valid_for_move (op, GET_MODE (op),
- NULL, NULL, NULL,
- NULL, NULL) != 0")))
+ (match_test "aarch64_simd_valid_immediate (op, GET_MODE (op),
+ false, NULL)")))
(define_constraint "Dh"
"@internal
diff --git a/gcc/config/aarch64/iterators.md b/gcc/config/aarch64/iterators.md
index 860d4d9a187..8e40c5de5d4 100644
--- a/gcc/config/aarch64/iterators.md
+++ b/gcc/config/aarch64/iterators.md
@@ -385,7 +385,8 @@
;; Double modes of vector modes (lower case).
(define_mode_attr Vdbl [(V8QI "v16qi") (V4HI "v8hi")
(V2SI "v4si") (V2SF "v4sf")
- (SI "v2si") (DI "v2di")])
+ (SI "v2si") (DI "v2di")
+ (DF "v2df")])
;; Narrowed modes for VDN.
(define_mode_attr VNARROWD [(V4HI "V8QI") (V2SI "V4HI")
diff --git a/gcc/config/aarch64/predicates.md b/gcc/config/aarch64/predicates.md
index 8514e8f8fbd..3e2b6b34357 100644
--- a/gcc/config/aarch64/predicates.md
+++ b/gcc/config/aarch64/predicates.md
@@ -115,16 +115,11 @@
(match_test "aarch64_legitimate_address_p (mode, XEXP (op, 0), PARALLEL,
0)")))
-(define_predicate "aarch64_const_address"
- (and (match_code "symbol_ref")
- (match_test "mode == DImode && CONSTANT_ADDRESS_P (op)")))
-
(define_predicate "aarch64_valid_symref"
(match_code "const, symbol_ref, label_ref")
{
- enum aarch64_symbol_type symbol_type;
- return (aarch64_symbolic_constant_p (op, SYMBOL_CONTEXT_ADR, &symbol_type)
- && symbol_type != SYMBOL_FORCE_TO_MEM);
+ return (aarch64_classify_symbolic_expression (op, SYMBOL_CONTEXT_ADR)
+ != SYMBOL_FORCE_TO_MEM);
})
(define_predicate "aarch64_tls_ie_symref"
@@ -170,15 +165,10 @@
})
(define_predicate "aarch64_mov_operand"
- (and (match_code "reg,subreg,mem,const_int,symbol_ref,high")
+ (and (match_code "reg,subreg,mem,const,const_int,symbol_ref,label_ref,high")
(ior (match_operand 0 "register_operand")
(ior (match_operand 0 "memory_operand")
- (ior (match_test "GET_CODE (op) == HIGH
- && aarch64_valid_symref (XEXP (op, 0),
- GET_MODE (XEXP (op, 0)))")
- (ior (match_test "CONST_INT_P (op)
- && aarch64_move_imm (INTVAL (op), mode)")
- (match_test "aarch64_const_address (op, mode)")))))))
+ (match_test "aarch64_mov_operand_p (op, SYMBOL_CONTEXT_ADR, mode)")))))
(define_predicate "aarch64_movti_operand"
(and (match_code "reg,subreg,mem,const_int")
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 69007a93385..5f5b33e347b 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -2700,12 +2700,12 @@ alpha_emit_conditional_move (rtx cmp, enum machine_mode mode)
break;
case GE: case GT: case GEU: case GTU:
- /* These must be swapped. */
- if (op1 != CONST0_RTX (cmp_mode))
- {
- code = swap_condition (code);
- tem = op0, op0 = op1, op1 = tem;
- }
+ /* These normally need swapping, but for integer zero we have
+ special patterns that recognize swapped operands. */
+ if (cmp_mode == DImode && op1 == const0_rtx)
+ break;
+ code = swap_condition (code);
+ tem = op0, op0 = op1, op1 = tem;
break;
default:
@@ -3067,12 +3067,9 @@ alpha_emit_xfloating_compare (enum rtx_code *pcode, rtx op0, rtx op1)
operands[1] = op1;
out = gen_reg_rtx (DImode);
- /* What's actually returned is -1,0,1, not a proper boolean value,
- so use an EXPR_LIST as with a generic libcall instead of a
- comparison type expression. */
- note = gen_rtx_EXPR_LIST (VOIDmode, op1, NULL_RTX);
- note = gen_rtx_EXPR_LIST (VOIDmode, op0, note);
- note = gen_rtx_EXPR_LIST (VOIDmode, func, note);
+ /* What's actually returned is -1,0,1, not a proper boolean value. */
+ note = gen_rtx_fmt_ee (cmp_code, VOIDmode, op0, op1);
+ note = gen_rtx_UNSPEC (DImode, gen_rtvec (1, note), UNSPEC_XFLT_COMPARE);
alpha_emit_xfloating_libcall (func, out, operands, 2, note);
return out;
diff --git a/gcc/config/alpha/alpha.md b/gcc/config/alpha/alpha.md
index 439752780a0..b020b457df2 100644
--- a/gcc/config/alpha/alpha.md
+++ b/gcc/config/alpha/alpha.md
@@ -23,6 +23,7 @@
;; Uses of UNSPEC in this file:
(define_c_enum "unspec" [
+ UNSPEC_XFLT_COMPARE
UNSPEC_ARG_HOME
UNSPEC_LDGP1
UNSPEC_INSXH
diff --git a/gcc/config/arm/arm-fixed.md b/gcc/config/arm/arm-fixed.md
index 10da396ab66..12bbbaf9083 100644
--- a/gcc/config/arm/arm-fixed.md
+++ b/gcc/config/arm/arm-fixed.md
@@ -19,12 +19,13 @@
;; This file contains ARM instructions that support fixed-point operations.
(define_insn "add<mode>3"
- [(set (match_operand:FIXED 0 "s_register_operand" "=r")
- (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
- (match_operand:FIXED 2 "s_register_operand" "r")))]
+ [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+ (plus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+ (match_operand:FIXED 2 "s_register_operand" "l,r")))]
"TARGET_32BIT"
"add%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")])
(define_insn "add<mode>3"
[(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -32,7 +33,8 @@
(match_operand:ADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"sadd<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "usadd<mode>3"
[(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -40,7 +42,8 @@
(match_operand:UQADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"uqadd<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "ssadd<mode>3"
[(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -48,15 +51,17 @@
(match_operand:QADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"qadd<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "sub<mode>3"
- [(set (match_operand:FIXED 0 "s_register_operand" "=r")
- (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "r")
- (match_operand:FIXED 2 "s_register_operand" "r")))]
+ [(set (match_operand:FIXED 0 "s_register_operand" "=l,r")
+ (minus:FIXED (match_operand:FIXED 1 "s_register_operand" "l,r")
+ (match_operand:FIXED 2 "s_register_operand" "l,r")))]
"TARGET_32BIT"
"sub%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")])
(define_insn "sub<mode>3"
[(set (match_operand:ADDSUB 0 "s_register_operand" "=r")
@@ -64,7 +69,8 @@
(match_operand:ADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"ssub<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "ussub<mode>3"
[(set (match_operand:UQADDSUB 0 "s_register_operand" "=r")
@@ -73,7 +79,8 @@
(match_operand:UQADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"uqsub<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "sssub<mode>3"
[(set (match_operand:QADDSUB 0 "s_register_operand" "=r")
@@ -81,7 +88,8 @@
(match_operand:QADDSUB 2 "s_register_operand" "r")))]
"TARGET_INT_SIMD"
"qsub<qaddsub_suf>%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
;; Fractional multiplies.
@@ -374,6 +382,7 @@
"TARGET_32BIT && arm_arch6"
"ssat%?\\t%0, #16, %2%S1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "insn" "sat")
(set_attr "shift" "1")
(set_attr "type" "alu_shift")])
@@ -384,4 +393,5 @@
"TARGET_INT_SIMD"
"usat%?\\t%0, #16, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "insn" "sat")])
diff --git a/gcc/config/arm/arm-generic.md b/gcc/config/arm/arm-generic.md
index 9705f751ae6..8a3335055d1 100644
--- a/gcc/config/arm/arm-generic.md
+++ b/gcc/config/arm/arm-generic.md
@@ -114,7 +114,9 @@
(define_insn_reservation "mult" 16
(and (eq_attr "generic_sched" "yes")
- (and (eq_attr "ldsched" "no") (eq_attr "type" "mult")))
+ (and (eq_attr "ldsched" "no")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes"))))
"core*16")
(define_insn_reservation "mult_ldsched_strongarm" 3
@@ -122,7 +124,8 @@
(and (eq_attr "ldsched" "yes")
(and (eq_attr "tune"
"strongarm,strongarm110,strongarm1100,strongarm1110")
- (eq_attr "type" "mult"))))
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))))
"core*2")
(define_insn_reservation "mult_ldsched" 4
@@ -130,13 +133,17 @@
(and (eq_attr "ldsched" "yes")
(and (eq_attr "tune"
"!strongarm,strongarm110,strongarm1100,strongarm1110")
- (eq_attr "type" "mult"))))
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))))
"core*4")
(define_insn_reservation "multi_cycle" 32
(and (eq_attr "generic_sched" "yes")
(and (eq_attr "core_cycles" "multi")
- (eq_attr "type" "!mult,load_byte,load1,load2,load3,load4,store1,store2,store3,store4")))
+ (and (eq_attr "type" "!load_byte,load1,load2,load3,load4,\
+ store1,store2,store3,store4")
+ (not (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes"))))))
"core*32")
(define_insn_reservation "single_cycle" 1
diff --git a/gcc/config/arm/arm-ldmstm.ml b/gcc/config/arm/arm-ldmstm.ml
index 2bc9702bee2..e615437b125 100644
--- a/gcc/config/arm/arm-ldmstm.ml
+++ b/gcc/config/arm/arm-ldmstm.ml
@@ -146,12 +146,15 @@ let can_thumb addrmode update is_store =
| IA, true, true -> true
| _ -> false
+exception InvalidAddrMode of string;;
+
let target addrmode thumb =
match addrmode, thumb with
IA, true -> "TARGET_THUMB1"
| IA, false -> "TARGET_32BIT"
| DB, false -> "TARGET_32BIT"
| _, false -> "TARGET_ARM"
+ | _, _ -> raise (InvalidAddrMode "ERROR: Invalid Addressing mode for Thumb1.")
let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
let astr = string_of_addrmode addrmode in
@@ -181,8 +184,10 @@ let write_pattern_1 name ls addrmode nregs write_set_fn update thumb =
done;
Printf.printf "}\"\n";
Printf.printf " [(set_attr \"type\" \"%s%d\")" ls nregs;
- begin if not thumb then
+ if not thumb then begin
Printf.printf "\n (set_attr \"predicable\" \"yes\")";
+ if addrmode == IA || addrmode == DB then
+ Printf.printf "\n (set_attr \"predicable_short_it\" \"no\")";
end;
Printf.printf "])\n\n"
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index c791341f69b..ef94bbcea25 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -24,12 +24,13 @@
extern enum unwind_info_type arm_except_unwind_info (struct gcc_options *);
extern int use_return_insn (int, rtx);
+extern bool use_simple_return_p (void);
extern enum reg_class arm_regno_class (int);
extern void arm_load_pic_register (unsigned long);
extern int arm_volatile_func (void);
extern void arm_expand_prologue (void);
extern void arm_expand_epilogue (bool);
-extern void thumb2_expand_return (void);
+extern void thumb2_expand_return (bool);
extern const char *arm_strip_name_encoding (const char *);
extern void arm_asm_output_labelref (FILE *, const char *);
extern void thumb2_asm_output_opcode (FILE *);
@@ -94,7 +95,7 @@ extern enum reg_class coproc_secondary_reload_class (enum machine_mode, rtx,
extern bool arm_tls_referenced_p (rtx);
extern int arm_coproc_mem_operand (rtx, bool);
-extern int neon_vector_mem_operand (rtx, int);
+extern int neon_vector_mem_operand (rtx, int, bool);
extern int neon_struct_mem_operand (rtx);
extern int arm_no_early_store_addr_dep (rtx, rtx);
extern int arm_early_store_addr_dep (rtx, rtx);
@@ -227,6 +228,8 @@ extern const char *arm_mangle_type (const_tree);
extern void arm_order_regs_for_local_alloc (void);
+extern int arm_max_conditional_execute ();
+
/* Vectorizer cost model implementation. */
struct cpu_vec_costs {
const int scalar_stmt_cost; /* Cost of any scalar operation, excluding
@@ -256,8 +259,7 @@ struct tune_params
bool (*rtx_costs) (rtx, RTX_CODE, RTX_CODE, int *, bool);
bool (*sched_adjust_cost) (rtx, rtx, rtx, int *);
int constant_limit;
- /* Maximum number of instructions to conditionalise in
- arm_final_prescan_insn. */
+ /* Maximum number of instructions to conditionalise. */
int max_insns_skipped;
int num_prefetch_slots;
int l1_cache_size;
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index 44286926eb6..e6fd42079cb 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -23,6 +23,7 @@
#include "config.h"
#include "system.h"
#include "coretypes.h"
+#include "hash-table.h"
#include "tm.h"
#include "rtl.h"
#include "tree.h"
@@ -661,6 +662,10 @@ static const struct attribute_spec arm_attribute_table[] =
#undef TARGET_ASAN_SHADOW_OFFSET
#define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
+#undef MAX_INSN_PER_IT_BLOCK
+#define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
+
+
struct gcc_target targetm = TARGET_INITIALIZER;
/* Obstack for minipool constant handling. */
@@ -1054,7 +1059,7 @@ const struct tune_params arm_cortex_a15_tune =
arm_9e_rtx_costs,
NULL,
1, /* Constant limit. */
- 5, /* Max cond insns. */
+ 2, /* Max cond insns. */
ARM_PREFETCH_NOT_BENEFICIAL,
false, /* Prefer constant pool. */
arm_default_branch_cost,
@@ -1870,6 +1875,11 @@ arm_option_override (void)
arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
+ if (arm_restrict_it == 2)
+ arm_restrict_it = arm_arch8 && TARGET_THUMB2;
+
+ if (!TARGET_THUMB2)
+ arm_restrict_it = 0;
/* If we are not using the default (ARM mode) section anchor offset
ranges, then set the correct ranges now. */
@@ -2168,6 +2178,14 @@ arm_option_override (void)
global_options.x_param_values,
global_options_set.x_param_values);
+ /* Disable shrink-wrap when optimizing function for size, since it tends to
+ generate additional returns. */
+ if (optimize_function_for_size_p (cfun) && TARGET_THUMB2)
+ flag_shrink_wrap = false;
+ /* TBD: Dwarf info for apcs frame is not handled yet. */
+ if (TARGET_APCS_FRAME)
+ flag_shrink_wrap = false;
+
/* Register global variables with the garbage collector. */
arm_add_gc_roots ();
}
@@ -2517,6 +2535,18 @@ use_return_insn (int iscond, rtx sibling)
return 1;
}
+/* Return TRUE if we should try to use a simple_return insn, i.e. perform
+ shrink-wrapping if possible. This is the case if we need to emit a
+ prologue, which we can test by looking at the offsets. */
+bool
+use_simple_return_p (void)
+{
+ arm_stack_offsets *offsets;
+
+ offsets = arm_get_frame_offsets ();
+ return offsets->outgoing_args != 0;
+}
+
/* Return TRUE if int I is a valid immediate ARM constant. */
int
@@ -2656,6 +2686,8 @@ const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
switch (code)
{
case AND:
+ case IOR:
+ case XOR:
return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
&& (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
case PLUS:
@@ -3816,36 +3848,48 @@ arm_function_value(const_tree type, const_tree func,
return arm_libcall_value_1 (mode);
}
-static int
-libcall_eq (const void *p1, const void *p2)
+/* libcall hashtable helpers. */
+
+struct libcall_hasher : typed_noop_remove <rtx_def>
+{
+ typedef rtx_def value_type;
+ typedef rtx_def compare_type;
+ static inline hashval_t hash (const value_type *);
+ static inline bool equal (const value_type *, const compare_type *);
+ static inline void remove (value_type *);
+};
+
+inline bool
+libcall_hasher::equal (const value_type *p1, const compare_type *p2)
{
- return rtx_equal_p ((const_rtx) p1, (const_rtx) p2);
+ return rtx_equal_p (p1, p2);
}
-static hashval_t
-libcall_hash (const void *p1)
+inline hashval_t
+libcall_hasher::hash (const value_type *p1)
{
- return hash_rtx ((const_rtx) p1, VOIDmode, NULL, NULL, FALSE);
+ return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
}
+typedef hash_table <libcall_hasher> libcall_table_type;
+
static void
-add_libcall (htab_t htab, rtx libcall)
+add_libcall (libcall_table_type htab, rtx libcall)
{
- *htab_find_slot (htab, libcall, INSERT) = libcall;
+ *htab.find_slot (libcall, INSERT) = libcall;
}
static bool
arm_libcall_uses_aapcs_base (const_rtx libcall)
{
static bool init_done = false;
- static htab_t libcall_htab;
+ static libcall_table_type libcall_htab;
if (!init_done)
{
init_done = true;
- libcall_htab = htab_create (31, libcall_hash, libcall_eq,
- NULL);
+ libcall_htab.create (31);
add_libcall (libcall_htab,
convert_optab_libfunc (sfloat_optab, SFmode, SImode));
add_libcall (libcall_htab,
@@ -3904,7 +3948,7 @@ arm_libcall_uses_aapcs_base (const_rtx libcall)
DFmode));
}
- return libcall && htab_find (libcall_htab, libcall) != NULL;
+ return libcall && libcall_htab.find (libcall) != NULL;
}
static rtx
@@ -7819,7 +7863,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
&& GET_CODE (SET_SRC (x)) == VEC_SELECT)
{
*total = rtx_cost (SET_DEST (x), code, 0, speed);
- if (!neon_vector_mem_operand (SET_DEST (x), 2))
+ if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
*total += COSTS_N_INSNS (1);
return true;
}
@@ -7830,7 +7874,7 @@ arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
{
rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
*total = rtx_cost (mem, code, 0, speed);
- if (!neon_vector_mem_operand (mem, 2))
+ if (!neon_vector_mem_operand (mem, 2, true))
*total += COSTS_N_INSNS (1);
return true;
}
@@ -9101,6 +9145,12 @@ arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
return cost;
}
+int
+arm_max_conditional_execute (void)
+{
+ return max_insns_skipped;
+}
+
static int
arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
{
@@ -10002,7 +10052,7 @@ arm_coproc_mem_operand (rtx op, bool wb)
2 - Element/structure loads (vld1)
*/
int
-neon_vector_mem_operand (rtx op, int type)
+neon_vector_mem_operand (rtx op, int type, bool strict)
{
rtx ind;
@@ -10014,7 +10064,7 @@ neon_vector_mem_operand (rtx op, int type)
|| reg_mentioned_p (virtual_outgoing_args_rtx, op)
|| reg_mentioned_p (virtual_stack_dynamic_rtx, op)
|| reg_mentioned_p (virtual_stack_vars_rtx, op)))
- return FALSE;
+ return !strict;
/* Constants are converted into offsets from labels. */
if (!MEM_P (op))
@@ -10124,7 +10174,7 @@ coproc_secondary_reload_class (enum machine_mode mode, rtx x, bool wb)
{
if (!TARGET_NEON_FP16)
return GENERAL_REGS;
- if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2))
+ if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
return NO_REGS;
return GENERAL_REGS;
}
@@ -16135,25 +16185,34 @@ arm_compute_save_reg0_reg12_mask (void)
return save_reg_mask;
}
+/* Return true if r3 is live at the start of the function. */
+
+static bool
+arm_r3_live_at_start_p (void)
+{
+ /* Just look at cfg info, which is still close enough to correct at this
+ point. This gives false positives for broken functions that might use
+ uninitialized data that happens to be allocated in r3, but who cares? */
+ return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 3);
+}
/* Compute the number of bytes used to store the static chain register on the
- stack, above the stack frame. We need to know this accurately to get the
- alignment of the rest of the stack frame correct. */
+ stack, above the stack frame. We need to know this accurately to get the
+ alignment of the rest of the stack frame correct. */
-static int arm_compute_static_chain_stack_bytes (void)
+static int
+arm_compute_static_chain_stack_bytes (void)
{
- unsigned long func_type = arm_current_func_type ();
- int static_chain_stack_bytes = 0;
-
- if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM &&
- IS_NESTED (func_type) &&
- df_regs_ever_live_p (3) && crtl->args.pretend_args_size == 0)
- static_chain_stack_bytes = 4;
+ /* See the defining assertion in arm_expand_prologue. */
+ if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
+ && IS_NESTED (arm_current_func_type ())
+ && arm_r3_live_at_start_p ()
+ && crtl->args.pretend_args_size == 0)
+ return 4;
- return static_chain_stack_bytes;
+ return 0;
}
-
/* Compute a bit mask of which registers need to be
saved on the stack for the current function.
This is used by arm_get_frame_offsets, which may add extra registers. */
@@ -17122,6 +17181,19 @@ emit_multi_reg_push (unsigned long mask)
return par;
}
+/* Add a REG_CFA_ADJUST_CFA REG note to INSN.
+ SIZE is the offset to be adjusted.
+ DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
+static void
+arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
+{
+ rtx dwarf;
+
+ RTX_FRAME_RELATED_P (insn) = 1;
+ dwarf = gen_rtx_SET (VOIDmode, dest, plus_constant (Pmode, src, size));
+ add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
+}
+
/* Generate and emit an insn pattern that we will recognize as a pop_multi.
SAVED_REGS_MASK shows which registers need to be restored.
@@ -17212,6 +17284,9 @@ arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
par = emit_insn (par);
REG_NOTES (par) = dwarf;
+ if (!return_in_pc)
+ arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
+ stack_pointer_rtx, stack_pointer_rtx);
}
/* Generate and emit an insn pattern that we will recognize as a pop_multi
@@ -17282,6 +17357,9 @@ arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
par = emit_insn (par);
REG_NOTES (par) = dwarf;
+
+ arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
+ base_reg, base_reg);
}
/* Generate and emit a pattern that will be recognized as LDRD pattern. If even
@@ -17357,6 +17435,7 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
pattern can be emitted now. */
par = emit_insn (par);
REG_NOTES (par) = dwarf;
+ RTX_FRAME_RELATED_P (par) = 1;
}
i++;
@@ -17373,7 +17452,12 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
stack_pointer_rtx,
plus_constant (Pmode, stack_pointer_rtx, 4 * i));
RTX_FRAME_RELATED_P (tmp) = 1;
- emit_insn (tmp);
+ tmp = emit_insn (tmp);
+ if (!return_in_pc)
+ {
+ arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
+ stack_pointer_rtx, stack_pointer_rtx);
+ }
dwarf = NULL_RTX;
@@ -17407,9 +17491,11 @@ thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
else
{
par = emit_insn (tmp);
+ REG_NOTES (par) = dwarf;
+ arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
+ stack_pointer_rtx, stack_pointer_rtx);
}
- REG_NOTES (par) = dwarf;
}
else if ((num_regs % 2) == 1 && return_in_pc)
{
@@ -17568,11 +17654,27 @@ thumb_force_lr_save (void)
|| df_regs_ever_live_p (LR_REGNUM));
}
+/* We do not know if r3 will be available because
+ we do have an indirect tailcall happening in this
+ particular case. */
+static bool
+is_indirect_tailcall_p (rtx call)
+{
+ rtx pat = PATTERN (call);
+
+ /* Indirect tail call. */
+ pat = XVECEXP (pat, 0, 0);
+ if (GET_CODE (pat) == SET)
+ pat = SET_SRC (pat);
+
+ pat = XEXP (XEXP (pat, 0), 0);
+ return REG_P (pat);
+}
/* Return true if r3 is used by any of the tail call insns in the
current function. */
static bool
-any_sibcall_uses_r3 (void)
+any_sibcall_could_use_r3 (void)
{
edge_iterator ei;
edge e;
@@ -17586,7 +17688,8 @@ any_sibcall_uses_r3 (void)
if (!CALL_P (call))
call = prev_nonnote_nondebug_insn (call);
gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
- if (find_regno_fusage (call, USE, 3))
+ if (find_regno_fusage (call, USE, 3)
+ || is_indirect_tailcall_p (call))
return true;
}
return false;
@@ -17753,7 +17856,7 @@ arm_get_frame_offsets (void)
/* If it is safe to use r3, then do so. This sometimes
generates better code on Thumb-2 by avoiding the need to
use 32-bit push/pop instructions. */
- if (! any_sibcall_uses_r3 ()
+ if (! any_sibcall_could_use_r3 ()
&& arm_size_return_regs () <= 12
&& (offsets->saved_regs_mask & (1 << 3)) == 0
&& (TARGET_THUMB2 || !current_tune->prefer_ldrd_strd))
@@ -18064,16 +18167,16 @@ arm_expand_prologue (void)
}
else if (IS_NESTED (func_type))
{
- /* The Static chain register is the same as the IP register
+ /* The static chain register is the same as the IP register
used as a scratch register during stack frame creation.
To get around this need to find somewhere to store IP
whilst the frame is being created. We try the following
places in order:
- 1. The last argument register.
+ 1. The last argument register r3.
2. A slot on the stack above the frame. (This only
works if the function is not a varargs function).
- 3. Register r3, after pushing the argument registers
+ 3. Register r3 again, after pushing the argument registers
onto the stack.
Note - we only need to tell the dwarf2 backend about the SP
@@ -18081,7 +18184,7 @@ arm_expand_prologue (void)
doesn't need to be unwound, as it doesn't contain a value
inherited from the caller. */
- if (df_regs_ever_live_p (3) == false)
+ if (!arm_r3_live_at_start_p ())
insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
else if (args_to_push == 0)
{
@@ -18222,8 +18325,7 @@ arm_expand_prologue (void)
if (IS_NESTED (func_type))
{
/* Recover the static chain register. */
- if (!df_regs_ever_live_p (3)
- || saved_pretend_args)
+ if (!arm_r3_live_at_start_p () || saved_pretend_args)
insn = gen_rtx_REG (SImode, 3);
else /* if (crtl->args.pretend_args_size == 0) */
{
@@ -19471,6 +19573,13 @@ thumb2_final_prescan_insn (rtx insn)
enum arm_cond_code code;
int n;
int mask;
+ int max;
+
+ /* Maximum number of conditionally executed instructions in a block
+ is minimum of the two max values: maximum allowed in an IT block
+ and maximum that is beneficial according to the cost model and tune. */
+ max = (max_insns_skipped < MAX_INSN_PER_IT_BLOCK) ?
+ max_insns_skipped : MAX_INSN_PER_IT_BLOCK;
/* Remove the previous insn from the count of insns to be output. */
if (arm_condexec_count)
@@ -19513,9 +19622,9 @@ thumb2_final_prescan_insn (rtx insn)
/* ??? Recognize conditional jumps, and combine them with IT blocks. */
if (GET_CODE (body) != COND_EXEC)
break;
- /* Allow up to 4 conditionally executed instructions in a block. */
+ /* Maximum number of conditionally executed instructions in a block. */
n = get_attr_ce_count (insn);
- if (arm_condexec_masklen + n > 4)
+ if (arm_condexec_masklen + n > max)
break;
predicate = COND_EXEC_TEST (body);
@@ -23978,7 +24087,7 @@ thumb1_expand_prologue (void)
all we really need to check here is if single register is to be
returned, or multiple register return. */
void
-thumb2_expand_return (void)
+thumb2_expand_return (bool simple_return)
{
int i, num_regs;
unsigned long saved_regs_mask;
@@ -23991,7 +24100,7 @@ thumb2_expand_return (void)
if (saved_regs_mask & (1 << i))
num_regs++;
- if (saved_regs_mask)
+ if (!simple_return && saved_regs_mask)
{
if (num_regs == 1)
{
@@ -24269,6 +24378,7 @@ arm_expand_epilogue (bool really_return)
if (frame_pointer_needed)
{
+ rtx insn;
/* Restore stack pointer if necessary. */
if (TARGET_ARM)
{
@@ -24279,9 +24389,12 @@ arm_expand_epilogue (bool really_return)
/* Force out any pending memory operations that reference stacked data
before stack de-allocation occurs. */
emit_insn (gen_blockage ());
- emit_insn (gen_addsi3 (stack_pointer_rtx,
- hard_frame_pointer_rtx,
- GEN_INT (amount)));
+ insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
+ hard_frame_pointer_rtx,
+ GEN_INT (amount)));
+ arm_add_cfa_adjust_cfa_note (insn, amount,
+ stack_pointer_rtx,
+ hard_frame_pointer_rtx);
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
deleted. */
@@ -24291,16 +24404,25 @@ arm_expand_epilogue (bool really_return)
{
/* In Thumb-2 mode, the frame pointer points to the last saved
register. */
- amount = offsets->locals_base - offsets->saved_regs;
- if (amount)
- emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
- hard_frame_pointer_rtx,
- GEN_INT (amount)));
+ amount = offsets->locals_base - offsets->saved_regs;
+ if (amount)
+ {
+ insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
+ hard_frame_pointer_rtx,
+ GEN_INT (amount)));
+ arm_add_cfa_adjust_cfa_note (insn, amount,
+ hard_frame_pointer_rtx,
+ hard_frame_pointer_rtx);
+ }
/* Force out any pending memory operations that reference stacked data
before stack de-allocation occurs. */
emit_insn (gen_blockage ());
- emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
+ insn = emit_insn (gen_movsi (stack_pointer_rtx,
+ hard_frame_pointer_rtx));
+ arm_add_cfa_adjust_cfa_note (insn, 0,
+ stack_pointer_rtx,
+ hard_frame_pointer_rtx);
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
deleted. */
emit_insn (gen_force_register_use (stack_pointer_rtx));
@@ -24313,12 +24435,15 @@ arm_expand_epilogue (bool really_return)
amount = offsets->outgoing_args - offsets->saved_regs;
if (amount)
{
+ rtx tmp;
/* Force out any pending memory operations that reference stacked data
before stack de-allocation occurs. */
emit_insn (gen_blockage ());
- emit_insn (gen_addsi3 (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (amount)));
+ tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (amount)));
+ arm_add_cfa_adjust_cfa_note (tmp, amount,
+ stack_pointer_rtx, stack_pointer_rtx);
/* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
not deleted. */
emit_insn (gen_force_register_use (stack_pointer_rtx));
@@ -24371,6 +24496,8 @@ arm_expand_epilogue (bool really_return)
REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
gen_rtx_REG (V2SImode, i),
NULL_RTX);
+ arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
+ stack_pointer_rtx, stack_pointer_rtx);
}
if (saved_regs_mask)
@@ -24418,6 +24545,9 @@ arm_expand_epilogue (bool really_return)
REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
gen_rtx_REG (SImode, i),
NULL_RTX);
+ arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
+ stack_pointer_rtx,
+ stack_pointer_rtx);
}
}
}
@@ -24442,9 +24572,33 @@ arm_expand_epilogue (bool really_return)
}
if (crtl->args.pretend_args_size)
- emit_insn (gen_addsi3 (stack_pointer_rtx,
- stack_pointer_rtx,
- GEN_INT (crtl->args.pretend_args_size)));
+ {
+ int i, j;
+ rtx dwarf = NULL_RTX;
+ rtx tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
+ stack_pointer_rtx,
+ GEN_INT (crtl->args.pretend_args_size)));
+
+ RTX_FRAME_RELATED_P (tmp) = 1;
+
+ if (cfun->machine->uses_anonymous_args)
+ {
+ /* Restore pretend args. Refer arm_expand_prologue on how to save
+ pretend_args in stack. */
+ int num_regs = crtl->args.pretend_args_size / 4;
+ saved_regs_mask = (0xf0 >> num_regs) & 0xf;
+ for (j = 0, i = 0; j < num_regs; i++)
+ if (saved_regs_mask & (1 << i))
+ {
+ rtx reg = gen_rtx_REG (SImode, i);
+ dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
+ j++;
+ }
+ REG_NOTES (tmp) = dwarf;
+ }
+ arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
+ stack_pointer_rtx, stack_pointer_rtx);
+ }
if (!really_return)
return;
@@ -25861,9 +26015,8 @@ arm_dwarf_register_span (rtx rtl)
nregs = GET_MODE_SIZE (GET_MODE (rtl)) / 8;
p = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nregs));
- regno = (regno - FIRST_VFP_REGNUM) / 2;
for (i = 0; i < nregs; i++)
- XVECEXP (p, 0, i) = gen_rtx_REG (DImode, 256 + regno + i);
+ XVECEXP (p, 0, i) = gen_rtx_REG (DImode, regno + i);
return p;
}
@@ -26113,9 +26266,17 @@ arm_unwind_emit (FILE * asm_out_file, rtx insn)
handled_one = true;
break;
+ /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
+ to get correct dwarf information for shrink-wrap. We should not
+ emit unwind information for it because these are used either for
+ pretend arguments or notes to adjust sp and restore registers from
+ stack. */
+ case REG_CFA_ADJUST_CFA:
+ case REG_CFA_RESTORE:
+ return;
+
case REG_CFA_DEF_CFA:
case REG_CFA_EXPRESSION:
- case REG_CFA_ADJUST_CFA:
case REG_CFA_OFFSET:
/* ??? Only handling here what we actually emit. */
gcc_unreachable ();
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index 3a49a90c184..387d2717431 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -183,6 +183,11 @@ extern arm_cc arm_current_cc;
#define ARM_INVERSE_CONDITION_CODE(X) ((arm_cc) (((int)X) ^ 1))
+/* The maximaum number of instructions that is beneficial to
+ conditionally execute. */
+#undef MAX_CONDITIONAL_EXECUTE
+#define MAX_CONDITIONAL_EXECUTE arm_max_conditional_execute ()
+
extern int arm_target_label;
extern int arm_ccfsm_state;
extern GTY(()) rtx arm_target_insn;
diff --git a/gcc/config/arm/arm.md b/gcc/config/arm/arm.md
index 4b45c984bf4..c464eddebd4 100644
--- a/gcc/config/arm/arm.md
+++ b/gcc/config/arm/arm.md
@@ -93,6 +93,15 @@
; IS_THUMB1 is set to 'yes' iff we are generating Thumb-1 code.
(define_attr "is_thumb1" "no,yes" (const (symbol_ref "thumb1_code")))
+; We use this attribute to disable alternatives that can produce 32-bit
+; instructions inside an IT-block in Thumb2 state. ARMv8 deprecates IT blocks
+; that contain 32-bit instructions.
+(define_attr "enabled_for_depr_it" "no,yes" (const_string "yes"))
+
+; This attribute is used to disable a predicated alternative when we have
+; arm_restrict_it.
+(define_attr "predicable_short_it" "no,yes" (const_string "yes"))
+
;; Operand number of an input operand that is shifted. Zero if the
;; given instruction does not shift one of its input operands.
(define_attr "shift" "" (const_int 0))
@@ -103,6 +112,8 @@
(define_attr "fpu" "none,vfp"
(const (symbol_ref "arm_fpu_attr")))
+(define_attr "predicated" "yes,no" (const_string "no"))
+
; LENGTH of an instruction (in bytes)
(define_attr "length" ""
(const_int 4))
@@ -190,6 +201,15 @@
(cond [(eq_attr "insn_enabled" "no")
(const_string "no")
+ (and (eq_attr "predicable_short_it" "no")
+ (and (eq_attr "predicated" "yes")
+ (match_test "arm_restrict_it")))
+ (const_string "no")
+
+ (and (eq_attr "enabled_for_depr_it" "no")
+ (match_test "arm_restrict_it"))
+ (const_string "no")
+
(eq_attr "arch_enabled" "no")
(const_string "no")
@@ -230,53 +250,91 @@
;; scheduling information.
(define_attr "insn"
- "mov,mvn,smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,umaal,smlald,smlsld,clz,mrs,msr,xtab,sdiv,udiv,sat,other"
+ "mov,mvn,clz,mrs,msr,xtab,sat,other"
(const_string "other"))
-; TYPE attribute is used to detect floating point instructions which, if
-; running on a co-processor can run in parallel with other, basic instructions
-; If write-buffer scheduling is enabled then it can also be used in the
-; scheduling of writes.
-
-; Classification of each insn
-; Note: vfp.md has different meanings for some of these, and some further
-; types as well. See that file for details.
-; simple_alu_imm a simple alu instruction that doesn't hit memory or fp
-; regs or have a shifted source operand and has an immediate
-; operand. This currently only tracks very basic immediate
-; alu operations.
-; alu_reg any alu instruction that doesn't hit memory or fp
-; regs or have a shifted source operand
-; and does not have an immediate operand. This is
-; also the default
-; simple_alu_shift covers UXTH, UXTB, SXTH, SXTB
-; alu_shift any data instruction that doesn't hit memory or fp
-; regs, but has a source operand shifted by a constant
-; alu_shift_reg any data instruction that doesn't hit memory or fp
-; regs, but has a source operand shifted by a register value
-; mult a multiply instruction
-; block blockage insn, this blocks all functional units
-; float a floating point arithmetic operation (subject to expansion)
-; fdivd DFmode floating point division
-; fdivs SFmode floating point division
-; f_load[sd] A single/double load from memory. Used for VFP unit.
-; f_store[sd] A single/double store to memory. Used for VFP unit.
-; f_flag a transfer of co-processor flags to the CPSR
-; f_2_r transfer float to core (no memory needed)
-; r_2_f transfer core to float
-; f_cvt convert floating<->integral
-; branch a branch
-; call a subroutine call
-; load_byte load byte(s) from memory to arm registers
-; load1 load 1 word from memory to arm registers
-; load2 load 2 words from memory to arm registers
-; load3 load 3 words from memory to arm registers
-; load4 load 4 words from memory to arm registers
-; store store 1 word to memory from arm registers
-; store2 store 2 words
-; store3 store 3 words
-; store4 store 4 (or more) words
+; TYPE attribute is used to classify instructions for use in scheduling.
;
+; Instruction classification:
+;
+; alu_reg any alu instruction that doesn't hit memory or fp
+; regs or have a shifted source operand and does not have
+; an immediate operand. This is also the default.
+; alu_shift any data instruction that doesn't hit memory or fp.
+; regs, but has a source operand shifted by a constant.
+; alu_shift_reg any data instruction that doesn't hit memory or fp.
+; block blockage insn, this blocks all functional units.
+; branch branch.
+; call subroutine call.
+; f_2_r transfer from float to core (no memory needed).
+; f_cvt conversion between float and integral.
+; f_flag transfer of co-processor flags to the CPSR.
+; f_load[d,s] double/single load from memory. Used for VFP unit.
+; f_minmax[d,s] double/single floating point minimum/maximum.
+; f_rint[d,s] double/single floating point rount to integral.
+; f_sel[d,s] double/single floating byte select.
+; f_store[d,s] double/single store to memory. Used for VFP unit.
+; fadd[d,s] double/single floating-point scalar addition.
+; fcmp[d,s] double/single floating-point compare.
+; fconst[d,s] double/single load immediate.
+; fcpys single precision floating point cpy.
+; fdiv[d,s] double/single precision floating point division.
+; ffarith[d,s] double/single floating point abs/neg/cpy.
+; ffma[d,s] double/single floating point fused multiply-accumulate.
+; float floating point arithmetic operation.
+; fmac[d,s] double/single floating point multiply-accumulate.
+; fmul[d,s] double/single floating point multiply.
+; load_byte load byte(s) from memory to arm registers.
+; load1 load 1 word from memory to arm registers.
+; load2 load 2 words from memory to arm registers.
+; load3 load 3 words from memory to arm registers.
+; load4 load 4 words from memory to arm registers.
+; mla integer multiply accumulate.
+; mlas integer multiply accumulate, flag setting.
+; mov integer move.
+; mul integer multiply.
+; muls integer multiply, flag setting.
+; r_2_f transfer from core to float.
+; sdiv signed division.
+; simple_alu_imm simple alu instruction that doesn't hit memory or fp
+; regs or have a shifted source operand and has an
+; immediate operand. This currently only tracks very basic
+; immediate alu operations.
+; simple_alu_shift simple alu instruction with a shifted source operand.
+; smlad signed multiply accumulate dual.
+; smladx signed multiply accumulate dual reverse.
+; smlal signed multiply accumulate long.
+; smlald signed multiply accumulate long dual.
+; smlals signed multiply accumulate long, flag setting.
+; smlalxy signed multiply accumulate, 16x16-bit, 64-bit accumulate.
+; smlawx signed multiply accumulate, 32x16-bit, 32-bit accumulate.
+; smlawy signed multiply accumulate wide, 32x16-bit,
+; 32-bit accumulate.
+; smlaxy signed multiply accumulate, 16x16-bit, 32-bit accumulate.
+; smlsd signed multiply subtract dual.
+; smlsdx signed multiply subtract dual reverse.
+; smlsld signed multiply subtract long dual.
+; smmla signed most significant word multiply accumulate.
+; smmul signed most significant word multiply.
+; smmulr signed most significant word multiply, rounded.
+; smuad signed dual multiply add.
+; smuadx signed dual multiply add reverse.
+; smull signed multiply long.
+; smulls signed multiply long, flag setting.
+; smulwy signed multiply wide, 32x16-bit, 32-bit accumulate.
+; smulxy signed multiply, 16x16-bit, 32-bit accumulate.
+; smusd signed dual multiply subtract.
+; smusdx signed dual multiply subtract reverse.
+; store1 store 1 word to memory from arm registers.
+; store2 store 2 words to memory from arm registers.
+; store3 store 3 words to memory from arm registers.
+; store4 store 4 (or more) words to memory from arm registers.
+; udiv unsigned division.
+; umaal unsigned multiply accumulate accumulate long.
+; umlal unsigned multiply accumulate long.
+; umlals unsigned multiply accumulate long, flag setting.
+; umull unsigned multiply long.
+; umulls unsigned multiply long, flag setting.
(define_attr "type"
"simple_alu_imm,\
@@ -284,7 +342,6 @@
simple_alu_shift,\
alu_shift,\
alu_shift_reg,\
- mult,\
block,\
float,\
fdivd,\
@@ -328,18 +385,57 @@
ffarithd,\
fcmps,\
fcmpd,\
- fcpys"
- (if_then_else
- (eq_attr "insn" "smulxy,smlaxy,smlalxy,smulwy,smlawx,mul,muls,mla,mlas,\
- umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
- (const_string "mult")
- (const_string "alu_reg")))
+ fcpys,\
+ smulxy,\
+ smlaxy,\
+ smlalxy,\
+ smulwy,\
+ smlawx,\
+ mul,\
+ muls,\
+ mla,\
+ mlas,\
+ umull,\
+ umulls,\
+ umlal,\
+ umlals,\
+ smull,\
+ smulls,\
+ smlal,\
+ smlals,\
+ smlawy,\
+ smuad,\
+ smuadx,\
+ smlad,\
+ smladx,\
+ smusd,\
+ smusdx,\
+ smlsd,\
+ smlsdx,\
+ smmul,\
+ smmulr,\
+ smmla,\
+ umaal,\
+ smlald,\
+ smlsld,\
+ sdiv,\
+ udiv"
+ (const_string "alu_reg"))
+
+; Is this an (integer side) multiply with a 32-bit (or smaller) result?
+(define_attr "mul32" "no,yes"
+ (if_then_else
+ (eq_attr "type"
+ "smulxy,smlaxy,smulwy,smlawx,mul,muls,mla,mlas,smlawy,smuad,smuadx,\
+ smlad,smladx,smusd,smusdx,smlsd,smlsdx,smmul,smmulr,smmla,smlald,smlsld")
+ (const_string "yes")
+ (const_string "no")))
; Is this an (integer side) multiply with a 64-bit result?
(define_attr "mul64" "no,yes"
(if_then_else
- (eq_attr "insn"
- "smlalxy,umull,umulls,umlal,umlals,smull,smulls,smlal,smlals")
+ (eq_attr "type"
+ "smlalxy,umull,umulls,umaal,umlal,umlals,smull,smulls,smlal,smlals")
(const_string "yes")
(const_string "no")))
@@ -1464,18 +1560,21 @@
(match_operand:SI 1 "s_register_operand" "%0,r")))]
"TARGET_32BIT && !arm_arch6"
"mul%?\\t%0, %2, %1"
- [(set_attr "insn" "mul")
+ [(set_attr "type" "mul")
(set_attr "predicable" "yes")]
)
(define_insn "*arm_mulsi3_v6"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
- (mult:SI (match_operand:SI 1 "s_register_operand" "r")
- (match_operand:SI 2 "s_register_operand" "r")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=l,l,r")
+ (mult:SI (match_operand:SI 1 "s_register_operand" "0,l,r")
+ (match_operand:SI 2 "s_register_operand" "l,0,r")))]
"TARGET_32BIT && arm_arch6"
"mul%?\\t%0, %1, %2"
- [(set_attr "insn" "mul")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "mul")
+ (set_attr "predicable" "yes")
+ (set_attr "arch" "t2,t2,*")
+ (set_attr "length" "4")
+ (set_attr "predicable_short_it" "yes,yes,no")]
)
; Unfortunately with the Thumb the '&'/'0' trick can fails when operands
@@ -1495,7 +1594,7 @@
return \"mul\\t%0, %2\";
"
[(set_attr "length" "4,4,2")
- (set_attr "insn" "mul")]
+ (set_attr "type" "muls")]
)
(define_insn "*thumb_mulsi3_v6"
@@ -1508,7 +1607,7 @@
mul\\t%0, %1
mul\\t%0, %1"
[(set_attr "length" "2")
- (set_attr "insn" "mul")]
+ (set_attr "type" "muls")]
)
(define_insn "*mulsi3_compare0"
@@ -1522,7 +1621,7 @@
"TARGET_ARM && !arm_arch6"
"mul%.\\t%0, %2, %1"
[(set_attr "conds" "set")
- (set_attr "insn" "muls")]
+ (set_attr "type" "muls")]
)
(define_insn "*mulsi3_compare0_v6"
@@ -1536,7 +1635,7 @@
"TARGET_ARM && arm_arch6 && optimize_size"
"mul%.\\t%0, %2, %1"
[(set_attr "conds" "set")
- (set_attr "insn" "muls")]
+ (set_attr "type" "muls")]
)
(define_insn "*mulsi_compare0_scratch"
@@ -1549,7 +1648,7 @@
"TARGET_ARM && !arm_arch6"
"mul%.\\t%0, %2, %1"
[(set_attr "conds" "set")
- (set_attr "insn" "muls")]
+ (set_attr "type" "muls")]
)
(define_insn "*mulsi_compare0_scratch_v6"
@@ -1562,7 +1661,7 @@
"TARGET_ARM && arm_arch6 && optimize_size"
"mul%.\\t%0, %2, %1"
[(set_attr "conds" "set")
- (set_attr "insn" "muls")]
+ (set_attr "type" "muls")]
)
;; Unnamed templates to match MLA instruction.
@@ -1575,7 +1674,7 @@
(match_operand:SI 3 "s_register_operand" "r,r,0,0")))]
"TARGET_32BIT && !arm_arch6"
"mla%?\\t%0, %2, %1, %3"
- [(set_attr "insn" "mla")
+ [(set_attr "type" "mla")
(set_attr "predicable" "yes")]
)
@@ -1587,8 +1686,9 @@
(match_operand:SI 3 "s_register_operand" "r")))]
"TARGET_32BIT && arm_arch6"
"mla%?\\t%0, %2, %1, %3"
- [(set_attr "insn" "mla")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "mla")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "*mulsi3addsi_compare0"
@@ -1605,7 +1705,7 @@
"TARGET_ARM && arm_arch6"
"mla%.\\t%0, %2, %1, %3"
[(set_attr "conds" "set")
- (set_attr "insn" "mlas")]
+ (set_attr "type" "mlas")]
)
(define_insn "*mulsi3addsi_compare0_v6"
@@ -1622,7 +1722,7 @@
"TARGET_ARM && arm_arch6 && optimize_size"
"mla%.\\t%0, %2, %1, %3"
[(set_attr "conds" "set")
- (set_attr "insn" "mlas")]
+ (set_attr "type" "mlas")]
)
(define_insn "*mulsi3addsi_compare0_scratch"
@@ -1637,7 +1737,7 @@
"TARGET_ARM && !arm_arch6"
"mla%.\\t%0, %2, %1, %3"
[(set_attr "conds" "set")
- (set_attr "insn" "mlas")]
+ (set_attr "type" "mlas")]
)
(define_insn "*mulsi3addsi_compare0_scratch_v6"
@@ -1652,7 +1752,7 @@
"TARGET_ARM && arm_arch6 && optimize_size"
"mla%.\\t%0, %2, %1, %3"
[(set_attr "conds" "set")
- (set_attr "insn" "mlas")]
+ (set_attr "type" "mlas")]
)
(define_insn "*mulsi3subsi"
@@ -1663,8 +1763,9 @@
(match_operand:SI 1 "s_register_operand" "r"))))]
"TARGET_32BIT && arm_arch_thumb2"
"mls%?\\t%0, %2, %1, %3"
- [(set_attr "insn" "mla")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "mla")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_expand "maddsidi4"
@@ -1686,7 +1787,7 @@
(match_operand:DI 1 "s_register_operand" "0")))]
"TARGET_32BIT && arm_arch3m && !arm_arch6"
"smlal%?\\t%Q0, %R0, %3, %2"
- [(set_attr "insn" "smlal")
+ [(set_attr "type" "smlal")
(set_attr "predicable" "yes")]
)
@@ -1699,8 +1800,9 @@
(match_operand:DI 1 "s_register_operand" "0")))]
"TARGET_32BIT && arm_arch6"
"smlal%?\\t%Q0, %R0, %3, %2"
- [(set_attr "insn" "smlal")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smlal")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
;; 32x32->64 widening multiply.
@@ -1725,7 +1827,7 @@
(sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
"TARGET_32BIT && arm_arch3m && !arm_arch6"
"smull%?\\t%Q0, %R0, %1, %2"
- [(set_attr "insn" "smull")
+ [(set_attr "type" "smull")
(set_attr "predicable" "yes")]
)
@@ -1736,8 +1838,9 @@
(sign_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
"TARGET_32BIT && arm_arch6"
"smull%?\\t%Q0, %R0, %1, %2"
- [(set_attr "insn" "smull")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smull")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_expand "umulsidi3"
@@ -1756,7 +1859,7 @@
(zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
"TARGET_32BIT && arm_arch3m && !arm_arch6"
"umull%?\\t%Q0, %R0, %1, %2"
- [(set_attr "insn" "umull")
+ [(set_attr "type" "umull")
(set_attr "predicable" "yes")]
)
@@ -1767,8 +1870,9 @@
(zero_extend:DI (match_operand:SI 2 "s_register_operand" "r"))))]
"TARGET_32BIT && arm_arch6"
"umull%?\\t%Q0, %R0, %1, %2"
- [(set_attr "insn" "umull")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "umull")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_expand "umaddsidi4"
@@ -1790,7 +1894,7 @@
(match_operand:DI 1 "s_register_operand" "0")))]
"TARGET_32BIT && arm_arch3m && !arm_arch6"
"umlal%?\\t%Q0, %R0, %3, %2"
- [(set_attr "insn" "umlal")
+ [(set_attr "type" "umlal")
(set_attr "predicable" "yes")]
)
@@ -1803,8 +1907,9 @@
(match_operand:DI 1 "s_register_operand" "0")))]
"TARGET_32BIT && arm_arch6"
"umlal%?\\t%Q0, %R0, %3, %2"
- [(set_attr "insn" "umlal")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "umlal")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_expand "smulsi3_highpart"
@@ -1832,7 +1937,7 @@
(clobber (match_scratch:SI 3 "=&r,&r"))]
"TARGET_32BIT && arm_arch3m && !arm_arch6"
"smull%?\\t%3, %0, %2, %1"
- [(set_attr "insn" "smull")
+ [(set_attr "type" "smull")
(set_attr "predicable" "yes")]
)
@@ -1847,8 +1952,9 @@
(clobber (match_scratch:SI 3 "=r"))]
"TARGET_32BIT && arm_arch6"
"smull%?\\t%3, %0, %2, %1"
- [(set_attr "insn" "smull")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smull")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_expand "umulsi3_highpart"
@@ -1876,7 +1982,7 @@
(clobber (match_scratch:SI 3 "=&r,&r"))]
"TARGET_32BIT && arm_arch3m && !arm_arch6"
"umull%?\\t%3, %0, %2, %1"
- [(set_attr "insn" "umull")
+ [(set_attr "type" "umull")
(set_attr "predicable" "yes")]
)
@@ -1891,8 +1997,9 @@
(clobber (match_scratch:SI 3 "=r"))]
"TARGET_32BIT && arm_arch6"
"umull%?\\t%3, %0, %2, %1"
- [(set_attr "insn" "umull")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "umull")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "mulhisi3"
@@ -1903,7 +2010,7 @@
(match_operand:HI 2 "s_register_operand" "r"))))]
"TARGET_DSP_MULTIPLY"
"smulbb%?\\t%0, %1, %2"
- [(set_attr "insn" "smulxy")
+ [(set_attr "type" "smulxy")
(set_attr "predicable" "yes")]
)
@@ -1916,8 +2023,9 @@
(match_operand:HI 2 "s_register_operand" "r"))))]
"TARGET_DSP_MULTIPLY"
"smultb%?\\t%0, %1, %2"
- [(set_attr "insn" "smulxy")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smulxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "*mulhisi3bt"
@@ -1929,8 +2037,9 @@
(const_int 16))))]
"TARGET_DSP_MULTIPLY"
"smulbt%?\\t%0, %1, %2"
- [(set_attr "insn" "smulxy")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smulxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "*mulhisi3tt"
@@ -1943,8 +2052,9 @@
(const_int 16))))]
"TARGET_DSP_MULTIPLY"
"smultt%?\\t%0, %1, %2"
- [(set_attr "insn" "smulxy")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smulxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "maddhisi4"
@@ -1956,8 +2066,9 @@
(match_operand:SI 3 "s_register_operand" "r")))]
"TARGET_DSP_MULTIPLY"
"smlabb%?\\t%0, %1, %2, %3"
- [(set_attr "insn" "smlaxy")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smlaxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
;; Note: there is no maddhisi4ibt because this one is canonical form
@@ -1971,8 +2082,9 @@
(match_operand:SI 3 "s_register_operand" "r")))]
"TARGET_DSP_MULTIPLY"
"smlatb%?\\t%0, %1, %2, %3"
- [(set_attr "insn" "smlaxy")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smlaxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "*maddhisi4tt"
@@ -1986,22 +2098,24 @@
(match_operand:SI 3 "s_register_operand" "r")))]
"TARGET_DSP_MULTIPLY"
"smlatt%?\\t%0, %1, %2, %3"
- [(set_attr "insn" "smlaxy")
- (set_attr "predicable" "yes")]
+ [(set_attr "type" "smlaxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "maddhidi4"
[(set (match_operand:DI 0 "s_register_operand" "=r")
(plus:DI
(mult:DI (sign_extend:DI
- (match_operand:HI 1 "s_register_operand" "r"))
+ (match_operand:HI 1 "s_register_operand" "r"))
(sign_extend:DI
(match_operand:HI 2 "s_register_operand" "r")))
(match_operand:DI 3 "s_register_operand" "0")))]
"TARGET_DSP_MULTIPLY"
"smlalbb%?\\t%Q0, %R0, %1, %2"
- [(set_attr "insn" "smlalxy")
- (set_attr "predicable" "yes")])
+ [(set_attr "type" "smlalxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
;; Note: there is no maddhidi4ibt because this one is canonical form
(define_insn "*maddhidi4tb"
@@ -2016,8 +2130,9 @@
(match_operand:DI 3 "s_register_operand" "0")))]
"TARGET_DSP_MULTIPLY"
"smlaltb%?\\t%Q0, %R0, %1, %2"
- [(set_attr "insn" "smlalxy")
- (set_attr "predicable" "yes")])
+ [(set_attr "type" "smlalxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*maddhidi4tt"
[(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -2033,8 +2148,9 @@
(match_operand:DI 3 "s_register_operand" "0")))]
"TARGET_DSP_MULTIPLY"
"smlaltt%?\\t%Q0, %R0, %1, %2"
- [(set_attr "insn" "smlalxy")
- (set_attr "predicable" "yes")])
+ [(set_attr "type" "smlalxy")
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_expand "mulsf3"
[(set (match_operand:SF 0 "s_register_operand" "")
@@ -2163,29 +2279,28 @@
)
(define_insn_and_split "*anddi3_insn"
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r,&r,&r,w,w ,?&r,?&r,?w,?w")
- (and:DI (match_operand:DI 1 "s_register_operand" "%0 ,r ,0,r ,w,0 ,0 ,r ,w ,0")
- (match_operand:DI 2 "arm_anddi_operand_neon" "r ,r ,De,De,w,DL,r ,r ,w ,DL")))]
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w")
+ (and:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0")
+ (match_operand:DI 2 "arm_anddi_operand_neon" "w ,DL,r ,r ,De,De,w ,DL")))]
"TARGET_32BIT && !TARGET_IWMMXT"
{
switch (which_alternative)
{
- case 0:
- case 1:
+ case 0: /* fall through */
+ case 6: return "vand\t%P0, %P1, %P2";
+ case 1: /* fall through */
+ case 7: return neon_output_logic_immediate ("vand", &operands[2],
+ DImode, 1, VALID_NEON_QREG_MODE (DImode));
case 2:
- case 3: /* fall through */
- return "#";
- case 4: /* fall through */
- case 8: return "vand\t%P0, %P1, %P2";
+ case 3:
+ case 4:
case 5: /* fall through */
- case 9: return neon_output_logic_immediate ("vand", &operands[2],
- DImode, 1, VALID_NEON_QREG_MODE (DImode));
- case 6: return "#";
- case 7: return "#";
+ return "#";
default: gcc_unreachable ();
}
}
- "TARGET_32BIT && !TARGET_IWMMXT"
+ "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))"
[(set (match_dup 3) (match_dup 4))
(set (match_dup 5) (match_dup 6))]
"
@@ -2201,19 +2316,11 @@
gen_highpart_mode (SImode, DImode, operands[2]));
}"
- [(set_attr "neon_type" "*,*,*,*,neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
- (set_attr "arch" "*,*,*,*,neon_for_64bits,neon_for_64bits,*,*,
+ [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,
avoid_neon_for_64bits,avoid_neon_for_64bits")
- (set_attr "length" "8,8,8,8,*,*,8,8,*,*")
- (set (attr "insn_enabled") (if_then_else
- (lt (symbol_ref "which_alternative")
- (const_int 4))
- (if_then_else (match_test "!TARGET_NEON")
- (const_string "yes")
- (const_string "no"))
- (if_then_else (match_test "TARGET_NEON")
- (const_string "yes")
- (const_string "no"))))]
+ (set_attr "length" "*,*,8,8,8,8,*,*")
+ ]
)
(define_insn_and_split "*anddi_zesidi_di"
@@ -2399,7 +2506,7 @@
[(set (reg:CC_NOOV CC_REGNUM)
(compare:CC_NOOV (zero_extract:SI
(match_operand:SI 0 "s_register_operand" "r")
- (match_operand 1 "const_int_operand" "n")
+ (match_operand 1 "const_int_operand" "n")
(match_operand 2 "const_int_operand" "n"))
(const_int 0)))]
"TARGET_32BIT
@@ -2415,6 +2522,7 @@
"
[(set_attr "conds" "set")
(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "simple_alu_imm")]
)
@@ -2842,7 +2950,8 @@
"arm_arch_thumb2"
"bfc%?\t%0, %2, %1"
[(set_attr "length" "4")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "insv_t2"
@@ -2853,7 +2962,8 @@
"arm_arch_thumb2"
"bfi%?\t%0, %3, %2, %1"
[(set_attr "length" "4")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
; constants for op 2 will never be given to these patterns.
@@ -2880,7 +2990,7 @@
[(set_attr "length" "8")
(set_attr "predicable" "yes")]
)
-
+
(define_insn_and_split "*anddi_notzesidi_di"
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
(and:DI (not:DI (zero_extend:DI
@@ -2905,9 +3015,10 @@
operands[1] = gen_lowpart (SImode, operands[1]);
}"
[(set_attr "length" "4,8")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
-
+
(define_insn_and_split "*anddi_notsesidi_di"
[(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
(and:DI (not:DI (sign_extend:DI
@@ -2928,16 +3039,18 @@
operands[1] = gen_lowpart (SImode, operands[1]);
}"
[(set_attr "length" "8")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
-
+
(define_insn "andsi_notsi_si"
[(set (match_operand:SI 0 "s_register_operand" "=r")
(and:SI (not:SI (match_operand:SI 2 "s_register_operand" "r"))
(match_operand:SI 1 "s_register_operand" "r")))]
"TARGET_32BIT"
"bic%?\\t%0, %1, %2"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "thumb1_bicsi3"
@@ -2997,14 +3110,47 @@
""
)
-(define_insn "*iordi3_insn"
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
- (ior:DI (match_operand:DI 1 "s_register_operand" "%0,r")
- (match_operand:DI 2 "s_register_operand" "r,r")))]
- "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
- "#"
- [(set_attr "length" "8")
- (set_attr "predicable" "yes")]
+(define_insn_and_split "*iordi3_insn"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w ,&r,&r,&r,&r,?w,?w")
+ (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0 ,0 ,r ,0 ,r ,w ,0")
+ (match_operand:DI 2 "arm_iordi_operand_neon" "w ,Dl,r ,r ,Df,Df,w ,Dl")))]
+ "TARGET_32BIT && !TARGET_IWMMXT"
+ {
+ switch (which_alternative)
+ {
+ case 0: /* fall through */
+ case 6: return "vorr\t%P0, %P1, %P2";
+ case 1: /* fall through */
+ case 7: return neon_output_logic_immediate ("vorr", &operands[2],
+ DImode, 0, VALID_NEON_QREG_MODE (DImode));
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ return "#";
+ default: gcc_unreachable ();
+ }
+ }
+ "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+ [(set (match_dup 3) (match_dup 4))
+ (set (match_dup 5) (match_dup 6))]
+ "
+ {
+ operands[3] = gen_lowpart (SImode, operands[0]);
+ operands[5] = gen_highpart (SImode, operands[0]);
+
+ operands[4] = simplify_gen_binary (IOR, SImode,
+ gen_lowpart (SImode, operands[1]),
+ gen_lowpart (SImode, operands[2]));
+ operands[6] = simplify_gen_binary (IOR, SImode,
+ gen_highpart (SImode, operands[1]),
+ gen_highpart_mode (SImode, DImode, operands[2]));
+
+ }"
+ [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,*,*,neon_int_1,neon_int_1")
+ (set_attr "length" "*,*,8,8,8,8,*,*")
+ (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
)
(define_insn "*iordi_zesidi_di"
@@ -3017,7 +3163,8 @@
orr%?\\t%Q0, %Q1, %2
#"
[(set_attr "length" "4,8")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "*iordi_sesidi_di"
@@ -3137,19 +3284,49 @@
(define_expand "xordi3"
[(set (match_operand:DI 0 "s_register_operand" "")
(xor:DI (match_operand:DI 1 "s_register_operand" "")
- (match_operand:DI 2 "s_register_operand" "")))]
+ (match_operand:DI 2 "arm_xordi_operand" "")))]
"TARGET_32BIT"
""
)
-(define_insn "*xordi3_insn"
- [(set (match_operand:DI 0 "s_register_operand" "=&r,&r")
- (xor:DI (match_operand:DI 1 "s_register_operand" "%0,r")
- (match_operand:DI 2 "s_register_operand" "r,r")))]
- "TARGET_32BIT && !TARGET_IWMMXT && !TARGET_NEON"
- "#"
- [(set_attr "length" "8")
- (set_attr "predicable" "yes")]
+(define_insn_and_split "*xordi3_insn"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,&r,&r,&r,&r,?w")
+ (xor:DI (match_operand:DI 1 "s_register_operand" "w ,%0,r ,0 ,r ,w")
+ (match_operand:DI 2 "arm_xordi_operand" "w ,r ,r ,Dg,Dg,w")))]
+ "TARGET_32BIT && !TARGET_IWMMXT"
+{
+ switch (which_alternative)
+ {
+ case 1:
+ case 2:
+ case 3:
+ case 4: /* fall through */
+ return "#";
+ case 0: /* fall through */
+ case 5: return "veor\t%P0, %P1, %P2";
+ default: gcc_unreachable ();
+ }
+}
+ "TARGET_32BIT && !TARGET_IWMMXT && reload_completed
+ && !(IS_VFP_REGNUM (REGNO (operands[0])))"
+ [(set (match_dup 3) (match_dup 4))
+ (set (match_dup 5) (match_dup 6))]
+ "
+ {
+ operands[3] = gen_lowpart (SImode, operands[0]);
+ operands[5] = gen_highpart (SImode, operands[0]);
+
+ operands[4] = simplify_gen_binary (XOR, SImode,
+ gen_lowpart (SImode, operands[1]),
+ gen_lowpart (SImode, operands[2]));
+ operands[6] = simplify_gen_binary (XOR, SImode,
+ gen_highpart (SImode, operands[1]),
+ gen_highpart_mode (SImode, DImode, operands[2]));
+
+ }"
+ [(set_attr "length" "*,8,8,8,8,*")
+ (set_attr "neon_type" "neon_int_1,*,*,*,*,neon_int_1")
+ (set_attr "arch" "neon_for_64bits,*,*,*,*,avoid_neon_for_64bits")]
)
(define_insn "*xordi_zesidi_di"
@@ -3162,7 +3339,8 @@
eor%?\\t%Q0, %Q1, %2
#"
[(set_attr "length" "4,8")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "*xordi_sesidi_di"
@@ -3292,7 +3470,8 @@
""
[(set_attr "length" "8")
(set_attr "ce_count" "2")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
; ??? Are these four splitters still beneficial when the Thumb-2 bitfield
@@ -3428,7 +3607,8 @@
(const_int 0)))]
"TARGET_32BIT"
"bic%?\\t%0, %1, %1, asr #31"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "*smax_m1"
@@ -3437,7 +3617,8 @@
(const_int -1)))]
"TARGET_32BIT"
"orr%?\\t%0, %1, %1, asr #31"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn_and_split "*arm_smax_insn"
@@ -3485,7 +3666,8 @@
(const_int 0)))]
"TARGET_32BIT"
"and%?\\t%0, %1, %1, asr #31"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn_and_split "*arm_smin_insn"
@@ -4160,6 +4342,7 @@
"TARGET_32BIT"
"mvn%?\\t%0, %1%S3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "shift" "1")
(set_attr "insn" "mvn")
(set_attr "arch" "32,a")
@@ -4373,6 +4556,7 @@
[(set_attr "arch" "t2,any")
(set_attr "length" "2,4")
(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")
(set_attr "type" "load1")])
(define_insn "unaligned_loadhis"
@@ -4385,6 +4569,7 @@
[(set_attr "arch" "t2,any")
(set_attr "length" "2,4")
(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")
(set_attr "type" "load_byte")])
(define_insn "unaligned_loadhiu"
@@ -4397,6 +4582,7 @@
[(set_attr "arch" "t2,any")
(set_attr "length" "2,4")
(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")
(set_attr "type" "load_byte")])
(define_insn "unaligned_storesi"
@@ -4408,6 +4594,7 @@
[(set_attr "arch" "t2,any")
(set_attr "length" "2,4")
(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")
(set_attr "type" "store1")])
(define_insn "unaligned_storehi"
@@ -4419,6 +4606,7 @@
[(set_attr "arch" "t2,any")
(set_attr "length" "2,4")
(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")
(set_attr "type" "store1")])
;; Unaligned double-word load and store.
@@ -4487,7 +4675,8 @@
"arm_arch_thumb2"
"sbfx%?\t%0, %1, %3, %2"
[(set_attr "length" "4")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_insn "extzv_t2"
@@ -4498,7 +4687,8 @@
"arm_arch_thumb2"
"ubfx%?\t%0, %1, %3, %2"
[(set_attr "length" "4")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
@@ -4510,7 +4700,8 @@
"TARGET_IDIV"
"sdiv%?\t%0, %1, %2"
[(set_attr "predicable" "yes")
- (set_attr "insn" "sdiv")]
+ (set_attr "predicable_short_it" "no")
+ (set_attr "type" "sdiv")]
)
(define_insn "udivsi3"
@@ -4520,7 +4711,8 @@
"TARGET_IDIV"
"udiv%?\t%0, %1, %2"
[(set_attr "predicable" "yes")
- (set_attr "insn" "udiv")]
+ (set_attr "predicable_short_it" "no")
+ (set_attr "type" "udiv")]
)
@@ -4582,11 +4774,14 @@
)
(define_insn "*arm_negsi2"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
- (neg:SI (match_operand:SI 1 "s_register_operand" "r")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (neg:SI (match_operand:SI 1 "s_register_operand" "l,r")))]
"TARGET_32BIT"
"rsb%?\\t%0, %1, #0"
- [(set_attr "predicable" "yes")]
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")
+ (set_attr "arch" "t2,*")
+ (set_attr "length" "4")]
)
(define_insn "*thumb1_negsi2"
@@ -4904,11 +5099,14 @@
)
(define_insn "*arm_one_cmplsi2"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
- (not:SI (match_operand:SI 1 "s_register_operand" "r")))]
+ [(set (match_operand:SI 0 "s_register_operand" "=l,r")
+ (not:SI (match_operand:SI 1 "s_register_operand" "l,r")))]
"TARGET_32BIT"
"mvn%?\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "yes,no")
+ (set_attr "arch" "t2,*")
+ (set_attr "length" "4")
(set_attr "insn" "mvn")]
)
@@ -5234,7 +5432,8 @@
"TARGET_INT_SIMD"
"uxtah%?\\t%0, %2, %1"
[(set_attr "type" "alu_shift")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_expand "zero_extendqisi2"
@@ -5327,6 +5526,7 @@
"TARGET_INT_SIMD"
"uxtab%?\\t%0, %2, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "insn" "xtab")
(set_attr "type" "alu_shift")]
)
@@ -5379,7 +5579,8 @@
"TARGET_32BIT"
"tst%?\\t%0, #255"
[(set_attr "conds" "set")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_expand "extendhisi2"
@@ -5565,6 +5766,7 @@
ldr%(sh%)\\t%0, %1"
[(set_attr "type" "simple_alu_shift,load_byte")
(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "pool_range" "*,256")
(set_attr "neg_pool_range" "*,244")]
)
@@ -5677,7 +5879,8 @@
"sxtab%?\\t%0, %2, %1"
[(set_attr "type" "alu_shift")
(set_attr "insn" "xtab")
- (set_attr "predicable" "yes")]
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")]
)
(define_split
@@ -6133,6 +6336,7 @@
"arm_arch_thumb2"
"movt%?\t%0, #:upper16:%c2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "length" "4")]
)
@@ -7012,26 +7216,28 @@
"
)
-
(define_insn "*arm_movqi_insn"
- [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,Uu,r,m")
- (match_operand:QI 1 "general_operand" "r,I,K,Uu,l,m,r"))]
+ [(set (match_operand:QI 0 "nonimmediate_operand" "=r,r,r,l,r,l,Uu,r,m")
+ (match_operand:QI 1 "general_operand" "r,r,I,Py,K,Uu,l,m,r"))]
"TARGET_32BIT
&& ( register_operand (operands[0], QImode)
|| register_operand (operands[1], QImode))"
"@
mov%?\\t%0, %1
mov%?\\t%0, %1
+ mov%?\\t%0, %1
+ mov%?\\t%0, %1
mvn%?\\t%0, #%B1
ldr%(b%)\\t%0, %1
str%(b%)\\t%1, %0
ldr%(b%)\\t%0, %1
str%(b%)\\t%1, %0"
- [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1")
- (set_attr "insn" "mov,mov,mvn,*,*,*,*")
+ [(set_attr "type" "*,*,simple_alu_imm,simple_alu_imm,simple_alu_imm,load1, store1, load1, store1")
+ (set_attr "insn" "mov,mov,mov,mov,mvn,*,*,*,*")
(set_attr "predicable" "yes")
- (set_attr "arch" "any,any,any,t2,t2,any,any")
- (set_attr "length" "4,4,4,2,2,4,4")]
+ (set_attr "predicable_short_it" "yes,yes,yes,no,no,no,no,no,no")
+ (set_attr "arch" "t2,any,any,t2,any,t2,t2,any,any")
+ (set_attr "length" "2,4,4,2,4,2,2,4,4")]
)
(define_insn "*thumb1_movqi_insn"
@@ -8701,7 +8907,7 @@
(define_expand "movsfcc"
[(set (match_operand:SF 0 "s_register_operand" "")
- (if_then_else:SF (match_operand 1 "expandable_comparison_operator" "")
+ (if_then_else:SF (match_operand 1 "arm_cond_move_operator" "")
(match_operand:SF 2 "s_register_operand" "")
(match_operand:SF 3 "s_register_operand" "")))]
"TARGET_32BIT && TARGET_HARD_FLOAT"
@@ -8723,7 +8929,7 @@
(define_expand "movdfcc"
[(set (match_operand:DF 0 "s_register_operand" "")
- (if_then_else:DF (match_operand 1 "expandable_comparison_operator" "")
+ (if_then_else:DF (match_operand 1 "arm_cond_move_operator" "")
(match_operand:DF 2 "s_register_operand" "")
(match_operand:DF 3 "s_register_operand" "")))]
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
@@ -9276,17 +9482,17 @@
[(set_attr "type" "call")]
)
-(define_expand "return"
- [(return)]
+(define_expand "<return_str>return"
+ [(returns)]
"(TARGET_ARM || (TARGET_THUMB2
&& ARM_FUNC_TYPE (arm_current_func_type ()) == ARM_FT_NORMAL
&& !IS_STACKALIGN (arm_current_func_type ())))
- && USE_RETURN_INSN (FALSE)"
+ <return_cond_false>"
"
{
if (TARGET_THUMB2)
{
- thumb2_expand_return ();
+ thumb2_expand_return (<return_simple_p>);
DONE;
}
}
@@ -9311,13 +9517,13 @@
(set_attr "predicable" "yes")]
)
-(define_insn "*cond_return"
+(define_insn "*cond_<return_str>return"
[(set (pc)
(if_then_else (match_operator 0 "arm_comparison_operator"
[(match_operand 1 "cc_register" "") (const_int 0)])
- (return)
+ (returns)
(pc)))]
- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+ "TARGET_ARM <return_cond_true>"
"*
{
if (arm_ccfsm_state == 2)
@@ -9325,20 +9531,21 @@
arm_ccfsm_state += 2;
return \"\";
}
- return output_return_instruction (operands[0], true, false, false);
+ return output_return_instruction (operands[0], true, false,
+ <return_simple_p>);
}"
[(set_attr "conds" "use")
(set_attr "length" "12")
(set_attr "type" "load1")]
)
-(define_insn "*cond_return_inverted"
+(define_insn "*cond_<return_str>return_inverted"
[(set (pc)
(if_then_else (match_operator 0 "arm_comparison_operator"
[(match_operand 1 "cc_register" "") (const_int 0)])
(pc)
- (return)))]
- "TARGET_ARM && USE_RETURN_INSN (TRUE)"
+ (returns)))]
+ "TARGET_ARM <return_cond_true>"
"*
{
if (arm_ccfsm_state == 2)
@@ -9346,7 +9553,8 @@
arm_ccfsm_state += 2;
return \"\";
}
- return output_return_instruction (operands[0], true, true, false);
+ return output_return_instruction (operands[0], true, true,
+ <return_simple_p>);
}"
[(set_attr "conds" "use")
(set_attr "length" "12")
@@ -9908,6 +10116,16 @@
(eq:SI (match_operand:SI 1 "s_register_operand" "")
(const_int 0)))
(clobber (reg:CC CC_REGNUM))]
+ "arm_arch5 && TARGET_32BIT"
+ [(set (match_dup 0) (clz:SI (match_dup 1)))
+ (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+(define_split
+ [(set (match_operand:SI 0 "s_register_operand" "")
+ (eq:SI (match_operand:SI 1 "s_register_operand" "")
+ (const_int 0)))
+ (clobber (reg:CC CC_REGNUM))]
"TARGET_32BIT && reload_completed"
[(parallel
[(set (reg:CC CC_REGNUM)
@@ -9948,7 +10166,7 @@
(set (match_dup 0) (const_int 1)))])
(define_insn_and_split "*compare_scc"
- [(set (match_operand:SI 0 "s_register_operand" "=r,r")
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts,Ts")
(match_operator:SI 1 "arm_comparison_operator"
[(match_operand:SI 2 "s_register_operand" "r,r")
(match_operand:SI 3 "arm_add_operand" "rI,L")]))
@@ -9977,29 +10195,87 @@
;; Attempt to improve the sequence generated by the compare_scc splitters
;; not to use conditional execution.
+
+;; Rd = (eq (reg1) (const_int0)) // ARMv5
+;; clz Rd, reg1
+;; lsr Rd, Rd, #5
(define_peephole2
[(set (reg:CC CC_REGNUM)
(compare:CC (match_operand:SI 1 "register_operand" "")
- (match_operand:SI 2 "arm_rhs_operand" "")))
+ (const_int 0)))
+ (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+ (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+ (set (match_dup 0) (const_int 1)))]
+ "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+ [(set (match_dup 0) (clz:SI (match_dup 1)))
+ (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+;; Rd = (eq (reg1) (const_int0)) // !ARMv5
+;; negs Rd, reg1
+;; adc Rd, Rd, reg1
+(define_peephole2
+ [(set (reg:CC CC_REGNUM)
+ (compare:CC (match_operand:SI 1 "register_operand" "")
+ (const_int 0)))
(cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
(set (match_operand:SI 0 "register_operand" "") (const_int 0)))
(cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
(set (match_dup 0) (const_int 1)))
- (match_scratch:SI 3 "r")]
- "TARGET_32BIT"
+ (match_scratch:SI 2 "r")]
+ "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
[(parallel
[(set (reg:CC CC_REGNUM)
- (compare:CC (match_dup 1) (match_dup 2)))
- (set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))])
+ (compare:CC (const_int 0) (match_dup 1)))
+ (set (match_dup 2) (minus:SI (const_int 0) (match_dup 1)))])
+ (set (match_dup 0)
+ (plus:SI (plus:SI (match_dup 1) (match_dup 2))
+ (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]
+)
+
+;; Rd = (eq (reg1) (reg2/imm)) // ARMv5
+;; sub Rd, Reg1, reg2
+;; clz Rd, Rd
+;; lsr Rd, Rd, #5
+(define_peephole2
+ [(set (reg:CC CC_REGNUM)
+ (compare:CC (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "arm_rhs_operand" "")))
+ (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+ (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+ (set (match_dup 0) (const_int 1)))]
+ "arm_arch5 && TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+ [(set (match_dup 0) (minus:SI (match_dup 1) (match_dup 2)))
+ (set (match_dup 0) (clz:SI (match_dup 0)))
+ (set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 5)))]
+)
+
+
+;; Rd = (eq (reg1) (reg2/imm)) // ! ARMv5
+;; sub T1, Reg1, reg2
+;; negs Rd, T1
+;; adc Rd, Rd, T1
+(define_peephole2
+ [(set (reg:CC CC_REGNUM)
+ (compare:CC (match_operand:SI 1 "register_operand" "")
+ (match_operand:SI 2 "arm_rhs_operand" "")))
+ (cond_exec (ne (reg:CC CC_REGNUM) (const_int 0))
+ (set (match_operand:SI 0 "register_operand" "") (const_int 0)))
+ (cond_exec (eq (reg:CC CC_REGNUM) (const_int 0))
+ (set (match_dup 0) (const_int 1)))
+ (match_scratch:SI 3 "r")]
+ "TARGET_32BIT && peep2_regno_dead_p (3, CC_REGNUM)"
+ [(set (match_dup 3) (minus:SI (match_dup 1) (match_dup 2)))
(parallel
[(set (reg:CC CC_REGNUM)
(compare:CC (const_int 0) (match_dup 3)))
(set (match_dup 0) (minus:SI (const_int 0) (match_dup 3)))])
- (parallel
- [(set (match_dup 0)
- (plus:SI (plus:SI (match_dup 0) (match_dup 3))
- (geu:SI (reg:CC CC_REGNUM) (const_int 0))))
- (clobber (reg:CC CC_REGNUM))])])
+ (set (match_dup 0)
+ (plus:SI (plus:SI (match_dup 0) (match_dup 3))
+ (geu:SI (reg:CC CC_REGNUM) (const_int 0))))]
+)
(define_insn "*cond_move"
[(set (match_operand:SI 0 "s_register_operand" "=r,r,r")
@@ -10400,7 +10676,7 @@
)
(define_insn_and_split "*ior_scc_scc"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts")
(ior:SI (match_operator:SI 3 "arm_comparison_operator"
[(match_operand:SI 1 "s_register_operand" "r")
(match_operand:SI 2 "arm_add_operand" "rIL")])
@@ -10438,7 +10714,7 @@
[(match_operand:SI 4 "s_register_operand" "r")
(match_operand:SI 5 "arm_add_operand" "rIL")]))
(const_int 0)))
- (set (match_operand:SI 7 "s_register_operand" "=r")
+ (set (match_operand:SI 7 "s_register_operand" "=Ts")
(ior:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
"TARGET_32BIT"
@@ -10456,7 +10732,7 @@
(set_attr "length" "16")])
(define_insn_and_split "*and_scc_scc"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts")
(and:SI (match_operator:SI 3 "arm_comparison_operator"
[(match_operand:SI 1 "s_register_operand" "r")
(match_operand:SI 2 "arm_add_operand" "rIL")])
@@ -10496,7 +10772,7 @@
[(match_operand:SI 4 "s_register_operand" "r")
(match_operand:SI 5 "arm_add_operand" "rIL")]))
(const_int 0)))
- (set (match_operand:SI 7 "s_register_operand" "=r")
+ (set (match_operand:SI 7 "s_register_operand" "=Ts")
(and:SI (match_op_dup 3 [(match_dup 1) (match_dup 2)])
(match_op_dup 6 [(match_dup 4) (match_dup 5)])))]
"TARGET_32BIT"
@@ -10518,7 +10794,7 @@
;; need only zero the value if false (if true, then the value is already
;; correct).
(define_insn_and_split "*and_scc_scc_nodom"
- [(set (match_operand:SI 0 "s_register_operand" "=&r,&r,&r")
+ [(set (match_operand:SI 0 "s_register_operand" "=&Ts,&Ts,&Ts")
(and:SI (match_operator:SI 3 "arm_comparison_operator"
[(match_operand:SI 1 "s_register_operand" "r,r,0")
(match_operand:SI 2 "arm_add_operand" "rIL,0,rIL")])
@@ -12095,6 +12371,7 @@
(const_int 0)])]
"TARGET_32BIT"
""
+[(set_attr "predicated" "yes")]
)
(define_insn "force_register_use"
@@ -12365,7 +12642,8 @@
false, true))"
"ldrd%?\t%0, %3, [%1, %2]"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb2_ldrd_base"
[(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -12379,7 +12657,8 @@
operands[1], 0, false, true))"
"ldrd%?\t%0, %2, [%1]"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb2_ldrd_base_neg"
[(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -12393,7 +12672,8 @@
operands[1], -4, false, true))"
"ldrd%?\t%0, %2, [%1, #-4]"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb2_strd"
[(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
@@ -12410,7 +12690,8 @@
false, false))"
"strd%?\t%2, %4, [%0, %1]"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb2_strd_base"
[(set (mem:SI (match_operand:SI 0 "s_register_operand" "rk"))
@@ -12424,7 +12705,8 @@
operands[0], 0, false, false))"
"strd%?\t%1, %2, [%0]"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb2_strd_base_neg"
[(set (mem:SI (plus:SI (match_operand:SI 0 "s_register_operand" "rk")
@@ -12438,7 +12720,8 @@
operands[0], -4, false, false))"
"strd%?\t%1, %2, [%0, #-4]"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
;; Load the load/store double peephole optimizations.
diff --git a/gcc/config/arm/arm.opt b/gcc/config/arm/arm.opt
index afb42421c06..b9ae2b09682 100644
--- a/gcc/config/arm/arm.opt
+++ b/gcc/config/arm/arm.opt
@@ -239,6 +239,10 @@ mword-relocations
Target Report Var(target_word_relocations) Init(TARGET_DEFAULT_WORD_RELOCATIONS)
Only generate absolute relocations on word sized values.
+mrestrict-it
+Target Report Var(arm_restrict_it) Init(2)
+Generate IT blocks appropriate for ARMv8.
+
mfix-cortex-m3-ldrd
Target Report Var(fix_cm3_ldrd) Init(2)
Avoid overlapping destination and address registers on LDRD instructions
diff --git a/gcc/config/arm/arm1020e.md b/gcc/config/arm/arm1020e.md
index ab65978aebc..94e8c35f839 100644
--- a/gcc/config/arm/arm1020e.md
+++ b/gcc/config/arm/arm1020e.md
@@ -96,7 +96,7 @@
;; until after the memory stage.
(define_insn_reservation "1020mult1" 2
(and (eq_attr "tune" "arm1020e,arm1022e")
- (eq_attr "insn" "smulxy,smulwy"))
+ (eq_attr "type" "smulxy,smulwy"))
"1020a_e,1020a_m,1020a_w")
;; The "smlaxy" and "smlawx" instructions require two iterations through
@@ -104,7 +104,7 @@
;; the execute stage.
(define_insn_reservation "1020mult2" 2
(and (eq_attr "tune" "arm1020e,arm1022e")
- (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
+ (eq_attr "type" "smlaxy,smlalxy,smlawx"))
"1020a_e*2,1020a_m,1020a_w")
;; The "smlalxy", "mul", and "mla" instructions require two iterations
@@ -112,7 +112,7 @@
;; the memory stage.
(define_insn_reservation "1020mult3" 3
(and (eq_attr "tune" "arm1020e,arm1022e")
- (eq_attr "insn" "smlalxy,mul,mla"))
+ (eq_attr "type" "smlalxy,mul,mla"))
"1020a_e*2,1020a_m,1020a_w")
;; The "muls" and "mlas" instructions loop in the execute stage for
@@ -120,7 +120,7 @@
;; available after three iterations.
(define_insn_reservation "1020mult4" 3
(and (eq_attr "tune" "arm1020e,arm1022e")
- (eq_attr "insn" "muls,mlas"))
+ (eq_attr "type" "muls,mlas"))
"1020a_e*4,1020a_m,1020a_w")
;; Long multiply instructions that produce two registers of
@@ -135,7 +135,7 @@
;; available after the memory cycle.
(define_insn_reservation "1020mult5" 4
(and (eq_attr "tune" "arm1020e,arm1022e")
- (eq_attr "insn" "umull,umlal,smull,smlal"))
+ (eq_attr "type" "umull,umlal,smull,smlal"))
"1020a_e*3,1020a_m,1020a_w")
;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
@@ -143,7 +143,7 @@
;; The value result is available after four iterations.
(define_insn_reservation "1020mult6" 4
(and (eq_attr "tune" "arm1020e,arm1022e")
- (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+ (eq_attr "type" "umulls,umlals,smulls,smlals"))
"1020a_e*5,1020a_m,1020a_w")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/arm1026ejs.md b/gcc/config/arm/arm1026ejs.md
index 3fa4bd0c378..67b985ce68e 100644
--- a/gcc/config/arm/arm1026ejs.md
+++ b/gcc/config/arm/arm1026ejs.md
@@ -96,7 +96,7 @@
;; until after the memory stage.
(define_insn_reservation "mult1" 2
(and (eq_attr "tune" "arm1026ejs")
- (eq_attr "insn" "smulxy,smulwy"))
+ (eq_attr "type" "smulxy,smulwy"))
"a_e,a_m,a_w")
;; The "smlaxy" and "smlawx" instructions require two iterations through
@@ -104,7 +104,7 @@
;; the execute stage.
(define_insn_reservation "mult2" 2
(and (eq_attr "tune" "arm1026ejs")
- (eq_attr "insn" "smlaxy,smlalxy,smlawx"))
+ (eq_attr "type" "smlaxy,smlalxy,smlawx"))
"a_e*2,a_m,a_w")
;; The "smlalxy", "mul", and "mla" instructions require two iterations
@@ -112,7 +112,7 @@
;; the memory stage.
(define_insn_reservation "mult3" 3
(and (eq_attr "tune" "arm1026ejs")
- (eq_attr "insn" "smlalxy,mul,mla"))
+ (eq_attr "type" "smlalxy,mul,mla"))
"a_e*2,a_m,a_w")
;; The "muls" and "mlas" instructions loop in the execute stage for
@@ -120,7 +120,7 @@
;; available after three iterations.
(define_insn_reservation "mult4" 3
(and (eq_attr "tune" "arm1026ejs")
- (eq_attr "insn" "muls,mlas"))
+ (eq_attr "type" "muls,mlas"))
"a_e*4,a_m,a_w")
;; Long multiply instructions that produce two registers of
@@ -135,7 +135,7 @@
;; available after the memory cycle.
(define_insn_reservation "mult5" 4
(and (eq_attr "tune" "arm1026ejs")
- (eq_attr "insn" "umull,umlal,smull,smlal"))
+ (eq_attr "type" "umull,umlal,smull,smlal"))
"a_e*3,a_m,a_w")
;; The "umulls", "umlals", "smulls", and "smlals" instructions loop in
@@ -143,7 +143,7 @@
;; The value result is available after four iterations.
(define_insn_reservation "mult6" 4
(and (eq_attr "tune" "arm1026ejs")
- (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+ (eq_attr "type" "umulls,umlals,smulls,smlals"))
"a_e*5,a_m,a_w")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/arm1136jfs.md b/gcc/config/arm/arm1136jfs.md
index b5802e03919..3030182acca 100644
--- a/gcc/config/arm/arm1136jfs.md
+++ b/gcc/config/arm/arm1136jfs.md
@@ -129,13 +129,13 @@
;; Multiply and multiply-accumulate results are available after four stages.
(define_insn_reservation "11_mult1" 4
(and (eq_attr "tune" "arm1136js,arm1136jfs")
- (eq_attr "insn" "mul,mla"))
+ (eq_attr "type" "mul,mla"))
"e_1*2,e_2,e_3,e_wb")
;; The *S variants set the condition flags, which requires three more cycles.
(define_insn_reservation "11_mult2" 4
(and (eq_attr "tune" "arm1136js,arm1136jfs")
- (eq_attr "insn" "muls,mlas"))
+ (eq_attr "type" "muls,mlas"))
"e_1*2,e_2,e_3,e_wb")
(define_bypass 3 "11_mult1,11_mult2"
@@ -160,13 +160,13 @@
;; the two multiply-accumulate instructions.
(define_insn_reservation "11_mult3" 5
(and (eq_attr "tune" "arm1136js,arm1136jfs")
- (eq_attr "insn" "smull,umull,smlal,umlal"))
+ (eq_attr "type" "smull,umull,smlal,umlal"))
"e_1*3,e_2,e_3,e_wb*2")
;; The *S variants set the condition flags, which requires three more cycles.
(define_insn_reservation "11_mult4" 5
(and (eq_attr "tune" "arm1136js,arm1136jfs")
- (eq_attr "insn" "smulls,umulls,smlals,umlals"))
+ (eq_attr "type" "smulls,umulls,smlals,umlals"))
"e_1*3,e_2,e_3,e_wb*2")
(define_bypass 4 "11_mult3,11_mult4"
@@ -190,7 +190,8 @@
;; cycles.
(define_insn_reservation "11_mult5" 3
(and (eq_attr "tune" "arm1136js,arm1136jfs")
- (eq_attr "insn" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,smusd,smusdx,smlsd,smlsdx"))
+ (eq_attr "type" "smulxy,smlaxy,smulwy,smlawy,smuad,smuadx,smlad,smladx,\
+ smusd,smusdx,smlsd,smlsdx"))
"e_1,e_2,e_3,e_wb")
(define_bypass 2 "11_mult5"
@@ -211,14 +212,14 @@
;; The same idea, then the 32-bit result is added to a 64-bit quantity.
(define_insn_reservation "11_mult6" 4
(and (eq_attr "tune" "arm1136js,arm1136jfs")
- (eq_attr "insn" "smlalxy"))
+ (eq_attr "type" "smlalxy"))
"e_1*2,e_2,e_3,e_wb*2")
;; Signed 32x32 multiply, then the most significant 32 bits are extracted
;; and are available after the memory stage.
(define_insn_reservation "11_mult7" 4
(and (eq_attr "tune" "arm1136js,arm1136jfs")
- (eq_attr "insn" "smmul,smmulr"))
+ (eq_attr "type" "smmul,smmulr"))
"e_1*2,e_2,e_3,e_wb")
(define_bypass 3 "11_mult6,11_mult7"
diff --git a/gcc/config/arm/arm926ejs.md b/gcc/config/arm/arm926ejs.md
index 1fc82d3db7f..4db404e766f 100644
--- a/gcc/config/arm/arm926ejs.md
+++ b/gcc/config/arm/arm926ejs.md
@@ -81,32 +81,32 @@
(define_insn_reservation "9_mult1" 3
(and (eq_attr "tune" "arm926ejs")
- (eq_attr "insn" "smlalxy,mul,mla"))
+ (eq_attr "type" "smlalxy,mul,mla"))
"e*2,m,w")
(define_insn_reservation "9_mult2" 4
(and (eq_attr "tune" "arm926ejs")
- (eq_attr "insn" "muls,mlas"))
+ (eq_attr "type" "muls,mlas"))
"e*3,m,w")
(define_insn_reservation "9_mult3" 4
(and (eq_attr "tune" "arm926ejs")
- (eq_attr "insn" "umull,umlal,smull,smlal"))
+ (eq_attr "type" "umull,umlal,smull,smlal"))
"e*3,m,w")
(define_insn_reservation "9_mult4" 5
(and (eq_attr "tune" "arm926ejs")
- (eq_attr "insn" "umulls,umlals,smulls,smlals"))
+ (eq_attr "type" "umulls,umlals,smulls,smlals"))
"e*4,m,w")
(define_insn_reservation "9_mult5" 2
(and (eq_attr "tune" "arm926ejs")
- (eq_attr "insn" "smulxy,smlaxy,smlawx"))
+ (eq_attr "type" "smulxy,smlaxy,smlawx"))
"e,m,w")
(define_insn_reservation "9_mult6" 3
(and (eq_attr "tune" "arm926ejs")
- (eq_attr "insn" "smlalxy"))
+ (eq_attr "type" "smlalxy"))
"e*2,m,w")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/constraints.md b/gcc/config/arm/constraints.md
index 7e7b3e69e0a..251d4975b7c 100644
--- a/gcc/config/arm/constraints.md
+++ b/gcc/config/arm/constraints.md
@@ -260,6 +260,18 @@
(and (match_code "const_int")
(match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, AND)")))
+(define_constraint "Df"
+ "@internal
+ In ARM/Thumb-2 state a const_int that can be used by insn iordi."
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, IOR)")))
+
+(define_constraint "Dg"
+ "@internal
+ In ARM/Thumb-2 state a const_int that can be used by insn xordi."
+ (and (match_code "const_int")
+ (match_test "TARGET_32BIT && const_ok_for_dimode_op (ival, XOR)")))
+
(define_constraint "Di"
"@internal
In ARM/Thumb-2 state a const_int or const_double where both the high
@@ -317,6 +329,9 @@
(and (match_code "const_double")
(match_test "TARGET_32BIT && TARGET_VFP && vfp3_const_double_for_fract_bits (op)")))
+(define_register_constraint "Ts" "(arm_restrict_it) ? LO_REGS : GENERAL_REGS"
+ "For arm_restrict_it the core registers @code{r0}-@code{r7}. GENERAL_REGS otherwise.")
+
(define_memory_constraint "Ua"
"@internal
An address valid for loading/storing register exclusive"
@@ -346,21 +361,21 @@
In ARM/Thumb-2 state a valid address for Neon doubleword vector
load/store instructions."
(and (match_code "mem")
- (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0)")))
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 0, true)")))
(define_memory_constraint "Um"
"@internal
In ARM/Thumb-2 state a valid address for Neon element and structure
load/store instructions."
(and (match_code "mem")
- (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)")))
(define_memory_constraint "Us"
"@internal
In ARM/Thumb-2 state a valid address for non-offset loads/stores of
quad-word values in four ARM registers."
(and (match_code "mem")
- (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1)")))
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 1, true)")))
(define_memory_constraint "Uq"
"@internal
diff --git a/gcc/config/arm/cortex-a15.md b/gcc/config/arm/cortex-a15.md
index f0c1985fab5..981d055c668 100644
--- a/gcc/config/arm/cortex-a15.md
+++ b/gcc/config/arm/cortex-a15.md
@@ -87,28 +87,26 @@
;; 32-bit multiplies
(define_insn_reservation "cortex_a15_mult32" 3
(and (eq_attr "tune" "cortexa15")
- (and (eq_attr "type" "mult")
- (and (eq_attr "neon_type" "none")
- (eq_attr "mul64" "no"))))
+ (and (eq_attr "mul32" "yes")
+ (eq_attr "neon_type" "none")))
"ca15_issue1,ca15_mx")
;; 64-bit multiplies
(define_insn_reservation "cortex_a15_mult64" 4
(and (eq_attr "tune" "cortexa15")
- (and (eq_attr "type" "mult")
- (and (eq_attr "neon_type" "none")
- (eq_attr "mul64" "yes"))))
+ (and (eq_attr "mul64" "yes")
+ (eq_attr "neon_type" "none")))
"ca15_issue1,ca15_mx*2")
;; Integer divide
(define_insn_reservation "cortex_a15_udiv" 9
(and (eq_attr "tune" "cortexa15")
- (eq_attr "insn" "udiv"))
+ (eq_attr "type" "udiv"))
"ca15_issue1,ca15_mx")
(define_insn_reservation "cortex_a15_sdiv" 10
(and (eq_attr "tune" "cortexa15")
- (eq_attr "insn" "sdiv"))
+ (eq_attr "type" "sdiv"))
"ca15_issue1,ca15_mx")
;; Block all issue pipes for a cycle
diff --git a/gcc/config/arm/cortex-a5.md b/gcc/config/arm/cortex-a5.md
index 41a2c37e8fa..963d5babd7b 100644
--- a/gcc/config/arm/cortex-a5.md
+++ b/gcc/config/arm/cortex-a5.md
@@ -80,7 +80,8 @@
(define_insn_reservation "cortex_a5_mul" 2
(and (eq_attr "tune" "cortexa5")
- (eq_attr "type" "mult"))
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
"cortex_a5_ex1")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a53.md b/gcc/config/arm/cortex-a53.md
index b6a291e017b..e67fe55ecd3 100644
--- a/gcc/config/arm/cortex-a53.md
+++ b/gcc/config/arm/cortex-a53.md
@@ -89,7 +89,8 @@
(define_insn_reservation "cortex_a53_mul" 3
(and (eq_attr "tune" "cortexa53")
- (eq_attr "type" "mult"))
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
"cortex_a53_single_issue")
;; A multiply with a single-register result or an MLA, followed by an
@@ -103,12 +104,12 @@
;; Punt with a high enough latency for divides.
(define_insn_reservation "cortex_a53_udiv" 8
(and (eq_attr "tune" "cortexa53")
- (eq_attr "insn" "udiv"))
+ (eq_attr "type" "udiv"))
"(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*7")
(define_insn_reservation "cortex_a53_sdiv" 9
(and (eq_attr "tune" "cortexa53")
- (eq_attr "insn" "sdiv"))
+ (eq_attr "type" "sdiv"))
"(cortex_a53_slot0+cortex_a53_idiv),cortex_a53_idiv*8")
diff --git a/gcc/config/arm/cortex-a7.md b/gcc/config/arm/cortex-a7.md
index 3750f74f2c6..960174fb90a 100644
--- a/gcc/config/arm/cortex-a7.md
+++ b/gcc/config/arm/cortex-a7.md
@@ -127,8 +127,9 @@
(define_insn_reservation "cortex_a7_mul" 2
(and (eq_attr "tune" "cortexa7")
- (and (eq_attr "type" "mult")
- (eq_attr "neon_type" "none")))
+ (and (eq_attr "neon_type" "none")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes"))))
"cortex_a7_both")
;; Forward the result of a multiply operation to the accumulator
@@ -140,7 +141,7 @@
;; The latency depends on the operands, so we use an estimate here.
(define_insn_reservation "cortex_a7_idiv" 5
(and (eq_attr "tune" "cortexa7")
- (eq_attr "insn" "udiv,sdiv"))
+ (eq_attr "type" "udiv,sdiv"))
"cortex_a7_both*5")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/cortex-a8.md b/gcc/config/arm/cortex-a8.md
index bd1132a18c3..8d3e98734ce 100644
--- a/gcc/config/arm/cortex-a8.md
+++ b/gcc/config/arm/cortex-a8.md
@@ -139,22 +139,22 @@
(define_insn_reservation "cortex_a8_mul" 6
(and (eq_attr "tune" "cortexa8")
- (eq_attr "insn" "mul,smulxy,smmul"))
+ (eq_attr "type" "mul,smulxy,smmul"))
"cortex_a8_multiply_2")
(define_insn_reservation "cortex_a8_mla" 6
(and (eq_attr "tune" "cortexa8")
- (eq_attr "insn" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
+ (eq_attr "type" "mla,smlaxy,smlawy,smmla,smlad,smlsd"))
"cortex_a8_multiply_2")
(define_insn_reservation "cortex_a8_mull" 7
(and (eq_attr "tune" "cortexa8")
- (eq_attr "insn" "smull,umull,smlal,umlal,umaal,smlalxy"))
+ (eq_attr "type" "smull,umull,smlal,umlal,umaal,smlalxy"))
"cortex_a8_multiply_3")
(define_insn_reservation "cortex_a8_smulwy" 5
(and (eq_attr "tune" "cortexa8")
- (eq_attr "insn" "smulwy,smuad,smusd"))
+ (eq_attr "type" "smulwy,smuad,smusd"))
"cortex_a8_multiply")
;; smlald and smlsld are multiply-accumulate instructions but do not
@@ -162,7 +162,7 @@
;; cannot go in cortex_a8_mla above. (See below for bypass details.)
(define_insn_reservation "cortex_a8_smlald" 6
(and (eq_attr "tune" "cortexa8")
- (eq_attr "insn" "smlald,smlsld"))
+ (eq_attr "type" "smlald,smlsld"))
"cortex_a8_multiply_2")
;; A multiply with a single-register result or an MLA, followed by an
diff --git a/gcc/config/arm/cortex-a9.md b/gcc/config/arm/cortex-a9.md
index abbaa8d4e1e..05c114dc366 100644
--- a/gcc/config/arm/cortex-a9.md
+++ b/gcc/config/arm/cortex-a9.md
@@ -130,29 +130,29 @@ cortex_a9_p1_e2 + cortex_a9_p0_e1 + cortex_a9_p1_e1")
;; We get 16*16 multiply / mac results in 3 cycles.
(define_insn_reservation "cortex_a9_mult16" 3
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "smulxy"))
+ (eq_attr "type" "smulxy"))
"cortex_a9_mult16")
;; The 16*16 mac is slightly different that it
;; reserves M1 and M2 in the same cycle.
(define_insn_reservation "cortex_a9_mac16" 3
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "smlaxy"))
+ (eq_attr "type" "smlaxy"))
"cortex_a9_mac16")
(define_insn_reservation "cortex_a9_multiply" 4
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "mul,smmul,smmulr"))
+ (eq_attr "type" "mul,smmul,smmulr"))
"cortex_a9_mult")
(define_insn_reservation "cortex_a9_mac" 4
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "mla,smmla"))
+ (eq_attr "type" "mla,smmla"))
"cortex_a9_mac")
(define_insn_reservation "cortex_a9_multiply_long" 5
(and (eq_attr "tune" "cortexa9")
- (eq_attr "insn" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
+ (eq_attr "type" "smull,umull,smulls,umulls,smlal,smlals,umlal,umlals"))
"cortex_a9_mult_long")
;; An instruction with a result in E2 can be forwarded
diff --git a/gcc/config/arm/cortex-m4.md b/gcc/config/arm/cortex-m4.md
index 47b03644f73..dc3a3299572 100644
--- a/gcc/config/arm/cortex-m4.md
+++ b/gcc/config/arm/cortex-m4.md
@@ -31,7 +31,10 @@
;; ALU and multiply is one cycle.
(define_insn_reservation "cortex_m4_alu" 1
(and (eq_attr "tune" "cortexm4")
- (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,alu_shift,alu_shift_reg,mult"))
+ (ior (eq_attr "type" "alu_reg,simple_alu_imm,simple_alu_shift,\
+ alu_shift,alu_shift_reg")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes"))))
"cortex_m4_ex")
;; Byte, half-word and word load is two cycles.
diff --git a/gcc/config/arm/cortex-r4.md b/gcc/config/arm/cortex-r4.md
index 84e4a3a1e60..6d37079f2b3 100644
--- a/gcc/config/arm/cortex-r4.md
+++ b/gcc/config/arm/cortex-r4.md
@@ -128,32 +128,32 @@
(define_insn_reservation "cortex_r4_mul_4" 4
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "insn" "mul,smmul"))
+ (eq_attr "type" "mul,smmul"))
"cortex_r4_mul_2")
(define_insn_reservation "cortex_r4_mul_3" 3
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "insn" "smulxy,smulwy,smuad,smusd"))
+ (eq_attr "type" "smulxy,smulwy,smuad,smusd"))
"cortex_r4_mul")
(define_insn_reservation "cortex_r4_mla_4" 4
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "insn" "mla,smmla"))
+ (eq_attr "type" "mla,smmla"))
"cortex_r4_mul_2")
(define_insn_reservation "cortex_r4_mla_3" 3
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "insn" "smlaxy,smlawy,smlad,smlsd"))
+ (eq_attr "type" "smlaxy,smlawy,smlad,smlsd"))
"cortex_r4_mul")
(define_insn_reservation "cortex_r4_smlald" 3
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "insn" "smlald,smlsld"))
+ (eq_attr "type" "smlald,smlsld"))
"cortex_r4_mul")
(define_insn_reservation "cortex_r4_mull" 4
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "insn" "smull,umull,umlal,umaal"))
+ (eq_attr "type" "smull,umull,umlal,umaal"))
"cortex_r4_mul_2")
;; A multiply or an MLA with a single-register result, followed by an
@@ -196,12 +196,12 @@
;; This gives a latency of nine for udiv and ten for sdiv.
(define_insn_reservation "cortex_r4_udiv" 9
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "insn" "udiv"))
+ (eq_attr "type" "udiv"))
"cortex_r4_div_9")
(define_insn_reservation "cortex_r4_sdiv" 10
(and (eq_attr "tune_cortexr4" "yes")
- (eq_attr "insn" "sdiv"))
+ (eq_attr "type" "sdiv"))
"cortex_r4_div_10")
;; Branches. We assume correct prediction.
diff --git a/gcc/config/arm/fa526.md b/gcc/config/arm/fa526.md
index e03894aa61c..efc6a1db959 100644
--- a/gcc/config/arm/fa526.md
+++ b/gcc/config/arm/fa526.md
@@ -76,12 +76,12 @@
(define_insn_reservation "526_mult1" 2
(and (eq_attr "tune" "fa526")
- (eq_attr "insn" "smlalxy,smulxy,smlaxy,smlalxy"))
+ (eq_attr "type" "smlalxy,smulxy,smlaxy,smlalxy"))
"fa526_core")
(define_insn_reservation "526_mult2" 5
(and (eq_attr "tune" "fa526")
- (eq_attr "insn" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
+ (eq_attr "type" "mul,mla,muls,mlas,umull,umlal,smull,smlal,umulls,\
umlals,smulls,smlals,smlawx"))
"fa526_core*4")
diff --git a/gcc/config/arm/fa606te.md b/gcc/config/arm/fa606te.md
index d53617a78e3..dec26c5c3ac 100644
--- a/gcc/config/arm/fa606te.md
+++ b/gcc/config/arm/fa606te.md
@@ -71,22 +71,22 @@
(define_insn_reservation "606te_mult1" 2
(and (eq_attr "tune" "fa606te")
- (eq_attr "insn" "smlalxy"))
+ (eq_attr "type" "smlalxy"))
"fa606te_core")
(define_insn_reservation "606te_mult2" 3
(and (eq_attr "tune" "fa606te")
- (eq_attr "insn" "smlaxy,smulxy,smulwy,smlawy"))
+ (eq_attr "type" "smlaxy,smulxy,smulwy,smlawy"))
"fa606te_core*2")
(define_insn_reservation "606te_mult3" 4
(and (eq_attr "tune" "fa606te")
- (eq_attr "insn" "mul,mla,muls,mlas"))
+ (eq_attr "type" "mul,mla,muls,mlas"))
"fa606te_core*3")
(define_insn_reservation "606te_mult4" 5
(and (eq_attr "tune" "fa606te")
- (eq_attr "insn" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
+ (eq_attr "type" "umull,umlal,smull,smlal,umulls,umlals,smulls,smlals"))
"fa606te_core*4")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa626te.md b/gcc/config/arm/fa626te.md
index 690cb46d878..818ad607b47 100644
--- a/gcc/config/arm/fa626te.md
+++ b/gcc/config/arm/fa626te.md
@@ -82,22 +82,22 @@
(define_insn_reservation "626te_mult1" 2
(and (eq_attr "tune" "fa626,fa626te")
- (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
+ (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy"))
"fa626te_core")
(define_insn_reservation "626te_mult2" 2
(and (eq_attr "tune" "fa626,fa626te")
- (eq_attr "insn" "mul,mla"))
+ (eq_attr "type" "mul,mla"))
"fa626te_core")
(define_insn_reservation "626te_mult3" 3
(and (eq_attr "tune" "fa626,fa626te")
- (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
+ (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
"fa626te_core*2")
(define_insn_reservation "626te_mult4" 4
(and (eq_attr "tune" "fa626,fa626te")
- (eq_attr "insn" "smulls,smlals,umulls,umlals"))
+ (eq_attr "type" "smulls,smlals,umulls,umlals"))
"fa626te_core*3")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/fa726te.md b/gcc/config/arm/fa726te.md
index 07ab018f667..8790b035aa5 100644
--- a/gcc/config/arm/fa726te.md
+++ b/gcc/config/arm/fa726te.md
@@ -115,7 +115,7 @@
(define_insn_reservation "726te_mult_op" 3
(and (eq_attr "tune" "fa726te")
- (eq_attr "insn" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
+ (eq_attr "type" "smlalxy,mul,mla,muls,mlas,umull,umlal,smull,smlal,\
umulls,umlals,smulls,smlals,smlawx,smulxy,smlaxy"))
"fa726te_issue+fa726te_mac_pipe")
diff --git a/gcc/config/arm/fmp626.md b/gcc/config/arm/fmp626.md
index 8691450c3c7..f3b7dadcba2 100644
--- a/gcc/config/arm/fmp626.md
+++ b/gcc/config/arm/fmp626.md
@@ -77,22 +77,22 @@
(define_insn_reservation "mp626_mult1" 2
(and (eq_attr "tune" "fmp626")
- (eq_attr "insn" "smulwy,smlawy,smulxy,smlaxy"))
+ (eq_attr "type" "smulwy,smlawy,smulxy,smlaxy"))
"fmp626_core")
(define_insn_reservation "mp626_mult2" 2
(and (eq_attr "tune" "fmp626")
- (eq_attr "insn" "mul,mla"))
+ (eq_attr "type" "mul,mla"))
"fmp626_core")
(define_insn_reservation "mp626_mult3" 3
(and (eq_attr "tune" "fmp626")
- (eq_attr "insn" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
+ (eq_attr "type" "muls,mlas,smull,smlal,umull,umlal,smlalxy,smlawx"))
"fmp626_core*2")
(define_insn_reservation "mp626_mult4" 4
(and (eq_attr "tune" "fmp626")
- (eq_attr "insn" "smulls,smlals,umulls,umlals"))
+ (eq_attr "type" "smulls,smlals,umulls,umlals"))
"fmp626_core*3")
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index b3ad42b376f..d84929f3d1f 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -496,3 +496,11 @@
(define_int_attr nvrint_variant [(UNSPEC_NVRINTZ "z") (UNSPEC_NVRINTP "p")
(UNSPEC_NVRINTA "a") (UNSPEC_NVRINTM "m")
(UNSPEC_NVRINTX "x") (UNSPEC_NVRINTN "n")])
+;; Both kinds of return insn.
+(define_code_iterator returns [return simple_return])
+(define_code_attr return_str [(return "") (simple_return "simple_")])
+(define_code_attr return_simple_p [(return "false") (simple_return "true")])
+(define_code_attr return_cond_false [(return " && USE_RETURN_INSN (FALSE)")
+ (simple_return " && use_simple_return_p ()")])
+(define_code_attr return_cond_true [(return " && USE_RETURN_INSN (TRUE)")
+ (simple_return " && use_simple_return_p ()")])
diff --git a/gcc/config/arm/ldmstm.md b/gcc/config/arm/ldmstm.md
index 8ebdfc81761..ad137d492e4 100644
--- a/gcc/config/arm/ldmstm.md
+++ b/gcc/config/arm/ldmstm.md
@@ -37,7 +37,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"ldm%(ia%)\t%5, {%1, %2, %3, %4}"
[(set_attr "type" "load4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm4_ia"
[(match_parallel 0 "load_multiple_operation"
@@ -74,7 +75,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
"ldm%(ia%)\t%5!, {%1, %2, %3, %4}"
[(set_attr "type" "load4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm4_ia_update"
[(match_parallel 0 "load_multiple_operation"
@@ -108,7 +110,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"stm%(ia%)\t%5, {%1, %2, %3, %4}"
[(set_attr "type" "store4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm4_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -125,7 +128,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
"stm%(ia%)\t%5!, {%1, %2, %3, %4}"
[(set_attr "type" "store4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_stm4_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -302,7 +306,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"ldm%(db%)\t%5, {%1, %2, %3, %4}"
[(set_attr "type" "load4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*ldm4_db_update"
[(match_parallel 0 "load_multiple_operation"
@@ -323,7 +328,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
"ldm%(db%)\t%5!, {%1, %2, %3, %4}"
[(set_attr "type" "load4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm4_db"
[(match_parallel 0 "store_multiple_operation"
@@ -338,7 +344,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"stm%(db%)\t%5, {%1, %2, %3, %4}"
[(set_attr "type" "store4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm4_db_update"
[(match_parallel 0 "store_multiple_operation"
@@ -355,7 +362,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 5"
"stm%(db%)\t%5!, {%1, %2, %3, %4}"
[(set_attr "type" "store4")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_peephole2
[(set (match_operand:SI 0 "s_register_operand" "")
@@ -477,7 +485,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"ldm%(ia%)\t%4, {%1, %2, %3}"
[(set_attr "type" "load3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm3_ia"
[(match_parallel 0 "load_multiple_operation"
@@ -508,7 +517,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"ldm%(ia%)\t%4!, {%1, %2, %3}"
[(set_attr "type" "load3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm3_ia_update"
[(match_parallel 0 "load_multiple_operation"
@@ -537,7 +547,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"stm%(ia%)\t%4, {%1, %2, %3}"
[(set_attr "type" "store3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm3_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -552,7 +563,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"stm%(ia%)\t%4!, {%1, %2, %3}"
[(set_attr "type" "store3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_stm3_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -704,7 +716,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"ldm%(db%)\t%4, {%1, %2, %3}"
[(set_attr "type" "load3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*ldm3_db_update"
[(match_parallel 0 "load_multiple_operation"
@@ -722,7 +735,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"ldm%(db%)\t%4!, {%1, %2, %3}"
[(set_attr "type" "load3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm3_db"
[(match_parallel 0 "store_multiple_operation"
@@ -735,7 +749,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"stm%(db%)\t%4, {%1, %2, %3}"
[(set_attr "type" "store3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm3_db_update"
[(match_parallel 0 "store_multiple_operation"
@@ -750,7 +765,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 4"
"stm%(db%)\t%4!, {%1, %2, %3}"
[(set_attr "type" "store3")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_peephole2
[(set (match_operand:SI 0 "s_register_operand" "")
@@ -855,7 +871,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
"ldm%(ia%)\t%3, {%1, %2}"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm2_ia"
[(match_parallel 0 "load_multiple_operation"
@@ -880,7 +897,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"ldm%(ia%)\t%3!, {%1, %2}"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_ldm2_ia_update"
[(match_parallel 0 "load_multiple_operation"
@@ -904,7 +922,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
"stm%(ia%)\t%3, {%1, %2}"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm2_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -917,7 +936,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"stm%(ia%)\t%3!, {%1, %2}"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*thumb_stm2_ia_update"
[(match_parallel 0 "store_multiple_operation"
@@ -1044,7 +1064,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
"ldm%(db%)\t%3, {%1, %2}"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*ldm2_db_update"
[(match_parallel 0 "load_multiple_operation"
@@ -1059,7 +1080,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"ldm%(db%)\t%3!, {%1, %2}"
[(set_attr "type" "load2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm2_db"
[(match_parallel 0 "store_multiple_operation"
@@ -1070,7 +1092,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 2"
"stm%(db%)\t%3, {%1, %2}"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "*stm2_db_update"
[(match_parallel 0 "store_multiple_operation"
@@ -1083,7 +1106,8 @@
"TARGET_32BIT && XVECLEN (operands[0], 0) == 3"
"stm%(db%)\t%3!, {%1, %2}"
[(set_attr "type" "store2")
- (set_attr "predicable" "yes")])
+ (set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_peephole2
[(set (match_operand:SI 0 "s_register_operand" "")
diff --git a/gcc/config/arm/marvell-pj4.md b/gcc/config/arm/marvell-pj4.md
index 39f4c584515..4004fa59409 100644
--- a/gcc/config/arm/marvell-pj4.md
+++ b/gcc/config/arm/marvell-pj4.md
@@ -95,10 +95,14 @@
"pj4_ir_mul,pj4_ir_div,pj4_core_to_vfp")
(define_insn_reservation "pj4_ir_mul" 3
- (and (eq_attr "tune" "marvell_pj4") (eq_attr "type" "mult")) "pj4_is,pj4_mul,nothing*2,pj4_cp")
+ (and (eq_attr "tune" "marvell_pj4")
+ (ior (eq_attr "mul32" "yes")
+ (eq_attr "mul64" "yes")))
+ "pj4_is,pj4_mul,nothing*2,pj4_cp")
(define_insn_reservation "pj4_ir_div" 20
- (and (eq_attr "tune" "marvell_pj4") (eq_attr "insn" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp")
+ (and (eq_attr "tune" "marvell_pj4")
+ (eq_attr "type" "udiv,sdiv")) "pj4_is,pj4_div*19,pj4_cp")
;; Branches and calls.
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index f91a6f7d08b..2761adb286a 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -241,8 +241,8 @@
})
(define_expand "movmisalign<mode>"
- [(set (match_operand:VDQX 0 "neon_struct_or_register_operand")
- (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_or_register_operand")]
+ [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
+ (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
UNSPEC_MISALIGNED_ACCESS))]
"TARGET_NEON && !BYTES_BIG_ENDIAN"
{
@@ -255,7 +255,7 @@
})
(define_insn "*movmisalign<mode>_neon_store"
- [(set (match_operand:VDX 0 "neon_struct_operand" "=Um")
+ [(set (match_operand:VDX 0 "neon_permissive_struct_operand" "=Um")
(unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
UNSPEC_MISALIGNED_ACCESS))]
"TARGET_NEON && !BYTES_BIG_ENDIAN"
@@ -263,15 +263,16 @@
[(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
(define_insn "*movmisalign<mode>_neon_load"
- [(set (match_operand:VDX 0 "s_register_operand" "=w")
- (unspec:VDX [(match_operand:VDX 1 "neon_struct_operand" " Um")]
+ [(set (match_operand:VDX 0 "s_register_operand" "=w")
+ (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
+ " Um")]
UNSPEC_MISALIGNED_ACCESS))]
"TARGET_NEON && !BYTES_BIG_ENDIAN"
"vld1.<V_sz_elem>\t{%P0}, %A1"
[(set_attr "neon_type" "neon_vld1_1_2_regs")])
(define_insn "*movmisalign<mode>_neon_store"
- [(set (match_operand:VQX 0 "neon_struct_operand" "=Um")
+ [(set (match_operand:VQX 0 "neon_permissive_struct_operand" "=Um")
(unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
UNSPEC_MISALIGNED_ACCESS))]
"TARGET_NEON && !BYTES_BIG_ENDIAN"
@@ -279,8 +280,9 @@
[(set_attr "neon_type" "neon_vst1_1_2_regs_vst2_2_regs")])
(define_insn "*movmisalign<mode>_neon_load"
- [(set (match_operand:VQX 0 "s_register_operand" "=w")
- (unspec:VQX [(match_operand:VQX 1 "neon_struct_operand" " Um")]
+ [(set (match_operand:VQX 0 "s_register_operand" "=w")
+ (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
+ " Um")]
UNSPEC_MISALIGNED_ACCESS))]
"TARGET_NEON && !BYTES_BIG_ENDIAN"
"vld1.<V_sz_elem>\t{%q0}, %A1"
@@ -679,29 +681,6 @@
[(set_attr "neon_type" "neon_int_1")]
)
-(define_insn "iordi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r,?w,?w")
- (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r,w,0")
- (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r,w,Dl")))]
- "TARGET_NEON"
-{
- switch (which_alternative)
- {
- case 0: /* fall through */
- case 4: return "vorr\t%P0, %P1, %P2";
- case 1: /* fall through */
- case 5: return neon_output_logic_immediate ("vorr", &operands[2],
- DImode, 0, VALID_NEON_QREG_MODE (DImode));
- case 2: return "#";
- case 3: return "#";
- default: gcc_unreachable ();
- }
-}
- [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*,neon_int_1,neon_int_1")
- (set_attr "length" "*,*,8,8,*,*")
- (set_attr "arch" "neon_for_64bits,neon_for_64bits,*,*,avoid_neon_for_64bits,avoid_neon_for_64bits")]
-)
-
;; The concrete forms of the Neon immediate-logic instructions are vbic and
;; vorr. We support the pseudo-instruction vand instead, because that
;; corresponds to the canonical form the middle-end expects to use for
@@ -805,21 +784,6 @@
[(set_attr "neon_type" "neon_int_1")]
)
-(define_insn "xordi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w")
- (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r,w")
- (match_operand:DI 2 "s_register_operand" "w,r,r,w")))]
- "TARGET_NEON"
- "@
- veor\t%P0, %P1, %P2
- #
- #
- veor\t%P0, %P1, %P2"
- [(set_attr "neon_type" "neon_int_1,*,*,neon_int_1")
- (set_attr "length" "*,8,8,*")
- (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits")]
-)
-
(define_insn "one_cmpl<mode>2"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
@@ -5617,7 +5581,7 @@
(match_operand:SI 3 "immediate_operand" "")]
"TARGET_NEON"
{
- emit_insn (gen_ior<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+ emit_insn (gen_ior<mode>3 (operands[0], operands[1], operands[2]));
DONE;
})
@@ -5628,7 +5592,7 @@
(match_operand:SI 3 "immediate_operand" "")]
"TARGET_NEON"
{
- emit_insn (gen_xor<mode>3<V_suf64> (operands[0], operands[1], operands[2]));
+ emit_insn (gen_xor<mode>3 (operands[0], operands[1], operands[2]));
DONE;
})
diff --git a/gcc/config/arm/predicates.md b/gcc/config/arm/predicates.md
index 92de9fe8bd9..f4a4515fa39 100644
--- a/gcc/config/arm/predicates.md
+++ b/gcc/config/arm/predicates.md
@@ -42,6 +42,17 @@
(ior (match_operand 0 "imm_for_neon_inv_logic_operand")
(match_operand 0 "s_register_operand")))
+(define_predicate "imm_for_neon_logic_operand"
+ (match_code "const_vector")
+{
+ return (TARGET_NEON
+ && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
+})
+
+(define_predicate "neon_logic_op2"
+ (ior (match_operand 0 "imm_for_neon_logic_operand")
+ (match_operand 0 "s_register_operand")))
+
;; Any hard register.
(define_predicate "arm_hard_register_operand"
(match_code "reg")
@@ -162,6 +173,17 @@
(match_test "const_ok_for_dimode_op (INTVAL (op), AND)"))
(match_operand 0 "neon_inv_logic_op2")))
+(define_predicate "arm_iordi_operand_neon"
+ (ior (match_operand 0 "s_register_operand")
+ (and (match_code "const_int")
+ (match_test "const_ok_for_dimode_op (INTVAL (op), IOR)"))
+ (match_operand 0 "neon_logic_op2")))
+
+(define_predicate "arm_xordi_operand"
+ (ior (match_operand 0 "s_register_operand")
+ (and (match_code "const_int")
+ (match_test "const_ok_for_dimode_op (INTVAL (op), XOR)"))))
+
(define_predicate "arm_adddi_operand"
(ior (match_operand 0 "s_register_operand")
(and (match_code "const_int")
@@ -299,6 +321,12 @@
|| maybe_get_arm_condition_code (op) == ARM_NE
|| maybe_get_arm_condition_code (op) == ARM_VC")))
+(define_special_predicate "arm_cond_move_operator"
+ (if_then_else (match_test "arm_restrict_it")
+ (and (match_test "TARGET_FPU_ARMV8")
+ (match_operand 0 "arm_vsel_comparison_operator"))
+ (match_operand 0 "expandable_comparison_operator")))
+
(define_special_predicate "noov_comparison_operator"
(match_code "lt,ge,eq,ne"))
@@ -535,17 +563,6 @@
(ior (match_operand 0 "s_register_operand")
(match_operand 0 "imm_for_neon_rshift_operand")))
-(define_predicate "imm_for_neon_logic_operand"
- (match_code "const_vector")
-{
- return (TARGET_NEON
- && neon_immediate_valid_for_logic (op, mode, 0, NULL, NULL));
-})
-
-(define_predicate "neon_logic_op2"
- (ior (match_operand 0 "imm_for_neon_logic_operand")
- (match_operand 0 "s_register_operand")))
-
;; Predicates for named expanders that overlap multiple ISAs.
(define_predicate "cmpdi_operand"
@@ -623,10 +640,14 @@
(define_predicate "neon_struct_operand"
(and (match_code "mem")
- (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2)")))
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, true)")))
+
+(define_predicate "neon_permissive_struct_operand"
+ (and (match_code "mem")
+ (match_test "TARGET_32BIT && neon_vector_mem_operand (op, 2, false)")))
-(define_predicate "neon_struct_or_register_operand"
- (ior (match_operand 0 "neon_struct_operand")
+(define_predicate "neon_perm_struct_or_reg_operand"
+ (ior (match_operand 0 "neon_permissive_struct_operand")
(match_operand 0 "s_register_operand")))
(define_special_predicate "add_operator"
diff --git a/gcc/config/arm/sync.md b/gcc/config/arm/sync.md
index 980234836c9..8f7bd71c317 100644
--- a/gcc/config/arm/sync.md
+++ b/gcc/config/arm/sync.md
@@ -124,7 +124,8 @@
UNSPEC_LL))]
"TARGET_HAVE_LDREXD && ARM_DOUBLEWORD_ALIGN"
"ldrexd%?\t%0, %H0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_expand "atomic_compare_and_swap<mode>"
[(match_operand:SI 0 "s_register_operand" "") ;; bool out
@@ -361,7 +362,8 @@
VUNSPEC_LL)))]
"TARGET_HAVE_LDREXBH"
"ldrex<sync_sfx>%?\t%0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_acquire_exclusive<mode>"
[(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -371,7 +373,8 @@
VUNSPEC_LAX)))]
"TARGET_HAVE_LDACQ"
"ldaex<sync_sfx>%?\\t%0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_exclusivesi"
[(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -380,7 +383,8 @@
VUNSPEC_LL))]
"TARGET_HAVE_LDREX"
"ldrex%?\t%0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_acquire_exclusivesi"
[(set (match_operand:SI 0 "s_register_operand" "=r")
@@ -389,7 +393,8 @@
VUNSPEC_LAX))]
"TARGET_HAVE_LDACQ"
"ldaex%?\t%0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_exclusivedi"
[(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -398,7 +403,8 @@
VUNSPEC_LL))]
"TARGET_HAVE_LDREXD"
"ldrexd%?\t%0, %H0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_load_acquire_exclusivedi"
[(set (match_operand:DI 0 "s_register_operand" "=r")
@@ -407,7 +413,8 @@
VUNSPEC_LAX))]
"TARGET_HAVE_LDACQ && ARM_DOUBLEWORD_ALIGN"
"ldaexd%?\t%0, %H0, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_store_exclusive<mode>"
[(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -431,7 +438,8 @@
}
return "strex<sync_sfx>%?\t%0, %2, %C1";
}
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_store_release_exclusivedi"
[(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -448,7 +456,8 @@
operands[3] = gen_rtx_REG (SImode, REGNO (value) + 1);
return "stlexd%?\t%0, %2, %3, %C1";
}
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
(define_insn "arm_store_release_exclusive<mode>"
[(set (match_operand:SI 0 "s_register_operand" "=&r")
@@ -459,4 +468,5 @@
VUNSPEC_SLX))]
"TARGET_HAVE_LDACQ"
"stlex<sync_sfx>%?\t%0, %2, %C1"
- [(set_attr "predicable" "yes")])
+ [(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")])
diff --git a/gcc/config/arm/t-arm b/gcc/config/arm/t-arm
index fe075e5862a..246f0f5b540 100644
--- a/gcc/config/arm/t-arm
+++ b/gcc/config/arm/t-arm
@@ -78,8 +78,8 @@ $(srcdir)/config/arm/arm-tables.opt: $(srcdir)/config/arm/genopt.sh \
$(SHELL) $(srcdir)/config/arm/genopt.sh $(srcdir)/config/arm > \
$(srcdir)/config/arm/arm-tables.opt
-arm.o: $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
- $(RTL_H) $(TREE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
+arm.o: $(srcdir)/config/arm/arm.c $(CONFIG_H) $(SYSTEM_H) coretypes.h $(TM_H) \
+ $(RTL_H) $(TREE_H) $(HASH_TABLE_H) $(OBSTACK_H) $(REGS_H) hard-reg-set.h \
insn-config.h conditions.h output.h dumpfile.h \
$(INSN_ATTR_H) $(FLAGS_H) reload.h $(FUNCTION_H) \
$(EXPR_H) $(OPTABS_H) $(RECOG_H) $(CGRAPH_H) \
diff --git a/gcc/config/arm/thumb2.md b/gcc/config/arm/thumb2.md
index ca4eedb037b..cd5837480b8 100644
--- a/gcc/config/arm/thumb2.md
+++ b/gcc/config/arm/thumb2.md
@@ -267,8 +267,8 @@
;; regs. The high register alternatives are not taken into account when
;; choosing register preferences in order to reflect their expense.
(define_insn "*thumb2_movsi_insn"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r,l ,*hk,m,*m")
- (match_operand:SI 1 "general_operand" "rk ,I,K,j,mi,*mi,l,*hk"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r,l ,*hk,m,*m")
+ (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk"))]
"TARGET_THUMB2 && ! TARGET_IWMMXT
&& !(TARGET_HARD_FLOAT && TARGET_VFP)
&& ( register_operand (operands[0], SImode)
@@ -276,16 +276,19 @@
"@
mov%?\\t%0, %1
mov%?\\t%0, %1
+ mov%?\\t%0, %1
mvn%?\\t%0, #%B1
movw%?\\t%0, %1
ldr%?\\t%0, %1
ldr%?\\t%0, %1
str%?\\t%1, %0
str%?\\t%1, %0"
- [(set_attr "type" "*,*,simple_alu_imm,*,load1,load1,store1,store1")
+ [(set_attr "type" "*,simple_alu_imm,simple_alu_imm,simple_alu_imm,*,load1,load1,store1,store1")
+ (set_attr "length" "2,4,2,4,4,4,4,4,4")
(set_attr "predicable" "yes")
- (set_attr "pool_range" "*,*,*,*,1018,4094,*,*")
- (set_attr "neg_pool_range" "*,*,*,*,0,0,*,*")]
+ (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no")
+ (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*")
+ (set_attr "neg_pool_range" "*,*,*,*,*,0,0,*,*")]
)
(define_insn "tls_load_dot_plus_four"
@@ -390,26 +393,32 @@
)
(define_insn_and_split "*thumb2_movsicc_insn"
- [(set (match_operand:SI 0 "s_register_operand" "=r,r,r,r,r,r,r,r")
+ [(set (match_operand:SI 0 "s_register_operand" "=l,l,r,r,r,r,r,r,r,r,r,l")
(if_then_else:SI
(match_operator 3 "arm_comparison_operator"
[(match_operand 4 "cc_register" "") (const_int 0)])
- (match_operand:SI 1 "arm_not_operand" "0,0,rI,K,rI,rI,K,K")
- (match_operand:SI 2 "arm_not_operand" "rI,K,0,0,rI,K,rI,K")))]
+ (match_operand:SI 1 "arm_not_operand" "0 ,Py,0 ,0,rI,K,rI,rI,K ,K,r,lPy")
+ (match_operand:SI 2 "arm_not_operand" "Py,0 ,rI,K,0 ,0,rI,K ,rI,K,r,lPy")))]
"TARGET_THUMB2"
"@
it\\t%D3\;mov%D3\\t%0, %2
+ it\\t%d3\;mov%d3\\t%0, %1
+ it\\t%D3\;mov%D3\\t%0, %2
it\\t%D3\;mvn%D3\\t%0, #%B2
it\\t%d3\;mov%d3\\t%0, %1
it\\t%d3\;mvn%d3\\t%0, #%B1
#
#
#
+ #
+ #
#"
- ; alt 4: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
- ; alt 5: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
- ; alt 6: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
- ; alt 7: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2"
+ ; alt 6: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+ ; alt 7: ite\\t%d3\;mov%d3\\t%0, %1\;mvn%D3\\t%0, #%B2
+ ; alt 8: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mov%D3\\t%0, %2
+ ; alt 9: ite\\t%d3\;mvn%d3\\t%0, #%B1\;mvn%D3\\t%0, #%B2
+ ; alt 10: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2
+ ; alt 11: ite\\t%d3\;mov%d3\\t%0, %1\;mov%D3\\t%0, %2"
"&& reload_completed"
[(const_int 0)]
{
@@ -440,7 +449,8 @@
operands[2])));
DONE;
}
- [(set_attr "length" "6,6,6,6,10,10,10,10")
+ [(set_attr "length" "4,4,6,6,6,6,10,10,10,10,6,6")
+ (set_attr "enabled_for_depr_it" "yes,yes,no,no,no,no,no,no,no,no,yes,yes")
(set_attr "conds" "use")]
)
@@ -491,29 +501,30 @@
(define_insn_and_split "*thumb2_and_scc"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts")
(and:SI (match_operator:SI 1 "arm_comparison_operator"
[(match_operand 2 "cc_register" "") (const_int 0)])
(match_operand:SI 3 "s_register_operand" "r")))]
"TARGET_THUMB2"
- "#" ; "ite\\t%D1\;mov%D1\\t%0, #0\;and%d1\\t%0, %3, #1"
+ "#" ; "and\\t%0, %3, #1\;it\\t%D1\;mov%D1\\t%0, #0"
"&& reload_completed"
- [(cond_exec (match_dup 5) (set (match_dup 0) (const_int 0)))
- (cond_exec (match_dup 4) (set (match_dup 0)
- (and:SI (match_dup 3) (const_int 1))))]
+ [(set (match_dup 0)
+ (and:SI (match_dup 3) (const_int 1)))
+ (cond_exec (match_dup 4) (set (match_dup 0) (const_int 0)))]
{
enum machine_mode mode = GET_MODE (operands[2]);
enum rtx_code rc = GET_CODE (operands[1]);
- operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
if (mode == CCFPmode || mode == CCFPEmode)
rc = reverse_condition_maybe_unordered (rc);
else
rc = reverse_condition (rc);
- operands[5] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
+ operands[4] = gen_rtx_fmt_ee (rc, VOIDmode, operands[2], const0_rtx);
}
[(set_attr "conds" "use")
- (set_attr "length" "10")]
+ (set (attr "length") (if_then_else (match_test "arm_restrict_it")
+ (const_int 8)
+ (const_int 10)))]
)
(define_insn_and_split "*thumb2_ior_scc"
@@ -649,7 +660,7 @@
)
(define_insn_and_split "*thumb2_negscc"
- [(set (match_operand:SI 0 "s_register_operand" "=r")
+ [(set (match_operand:SI 0 "s_register_operand" "=Ts")
(neg:SI (match_operator 3 "arm_comparison_operator"
[(match_operand:SI 1 "s_register_operand" "r")
(match_operand:SI 2 "arm_rhs_operand" "rI")])))
@@ -671,7 +682,7 @@
GEN_INT (31))));
DONE;
}
- else if (GET_CODE (operands[3]) == NE)
+ else if (GET_CODE (operands[3]) == NE && !arm_restrict_it)
{
/* Emit subs\\t%0, %1, %2\;it\\tne\;mvnne\\t%0, #0 */
if (CONST_INT_P (operands[2]))
@@ -691,7 +702,7 @@
}
else
{
- /* Emit: cmp\\t%1, %2\;ite\\t%D3\;mov%D3\\t%0, #0\;mvn%d3\\t%0, #0 */
+ /* Emit: cmp\\t%1, %2\;mvn\\t%0, #0\;it\\t%D3\;mov%D3\\t%0, #0\;*/
enum rtx_code rc = reverse_condition (GET_CODE (operands[3]));
enum machine_mode mode = SELECT_CC_MODE (rc, operands[1], operands[2]);
rtx tmp1 = gen_rtx_REG (mode, CC_REGNUM);
@@ -699,21 +710,15 @@
emit_insn (gen_rtx_SET (VOIDmode,
cc_reg,
gen_rtx_COMPARE (CCmode, operands[1], operands[2])));
+
+ emit_insn (gen_rtx_SET (VOIDmode, operands[0], GEN_INT (~0)));
+
emit_insn (gen_rtx_COND_EXEC (VOIDmode,
gen_rtx_fmt_ee (rc,
VOIDmode,
tmp1,
const0_rtx),
gen_rtx_SET (VOIDmode, operands[0], const0_rtx)));
- rc = GET_CODE (operands[3]);
- emit_insn (gen_rtx_COND_EXEC (VOIDmode,
- gen_rtx_fmt_ee (rc,
- VOIDmode,
- tmp1,
- const0_rtx),
- gen_rtx_SET (VOIDmode,
- operands[0],
- GEN_INT (~0))));
DONE;
}
FAIL;
@@ -1063,7 +1068,7 @@
"mul%!\\t%0, %2, %0"
[(set_attr "predicable" "yes")
(set_attr "length" "2")
- (set_attr "insn" "muls")])
+ (set_attr "type" "muls")])
(define_insn "*thumb2_mulsi_short_compare0"
[(set (reg:CC_NOOV CC_REGNUM)
@@ -1076,7 +1081,7 @@
"TARGET_THUMB2 && optimize_size"
"muls\\t%0, %2, %0"
[(set_attr "length" "2")
- (set_attr "insn" "muls")])
+ (set_attr "type" "muls")])
(define_insn "*thumb2_mulsi_short_compare0_scratch"
[(set (reg:CC_NOOV CC_REGNUM)
@@ -1088,7 +1093,7 @@
"TARGET_THUMB2 && optimize_size"
"muls\\t%0, %2, %0"
[(set_attr "length" "2")
- (set_attr "insn" "muls")])
+ (set_attr "type" "muls")])
(define_insn "*thumb2_cbz"
[(set (pc) (if_then_else
diff --git a/gcc/config/arm/vfp.md b/gcc/config/arm/vfp.md
index 1930cddb835..9ac887e9b19 100644
--- a/gcc/config/arm/vfp.md
+++ b/gcc/config/arm/vfp.md
@@ -18,31 +18,6 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>. */
-;; The VFP "type" attributes differ from those used in the FPA model.
-;; fcpys Single precision cpy.
-;; ffariths Single precision abs, neg.
-;; ffarithd Double precision abs, neg, cpy.
-;; fadds Single precision add/sub.
-;; faddd Double precision add/sub.
-;; fconsts Single precision load immediate.
-;; fconstd Double precision load immediate.
-;; fcmps Single precision comparison.
-;; fcmpd Double precision comparison.
-;; fmuls Single precision multiply.
-;; fmuld Double precision multiply.
-;; fmacs Single precision multiply-accumulate.
-;; fmacd Double precision multiply-accumulate.
-;; ffmas Single precision fused multiply-accumulate.
-;; ffmad Double precision fused multiply-accumulate.
-;; fdivs Single precision sqrt or division.
-;; fdivd Double precision sqrt or division.
-;; f_flag fmstat operation
-;; f_load[sd] Floating point load from memory.
-;; f_store[sd] Floating point store to memory.
-;; f_2_r Transfer vfp to arm reg.
-;; r_2_f Transfer arm to vfp reg.
-;; f_cvt Convert floating<->integral
-
;; SImode moves
;; ??? For now do not allow loading constants into vfp regs. This causes
;; problems because small constants get converted into adds.
@@ -87,45 +62,52 @@
;; See thumb2.md:thumb2_movsi_insn for an explanation of the split
;; high/low register alternatives for loads and stores here.
+;; The l/Py alternative should come after r/I to ensure that the short variant
+;; is chosen with length 2 when the instruction is predicated for
+;; arm_restrict_it.
(define_insn "*thumb2_movsi_vfp"
- [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv")
- (match_operand:SI 1 "general_operand" "rk, I,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
+ [(set (match_operand:SI 0 "nonimmediate_operand" "=rk,r,l,r,r, l,*hk,m, *m,*t, r,*t,*t, *Uv")
+ (match_operand:SI 1 "general_operand" "rk,I,Py,K,j,mi,*mi,l,*hk, r,*t,*t,*Uvi,*t"))]
"TARGET_THUMB2 && TARGET_VFP && TARGET_HARD_FLOAT
&& ( s_register_operand (operands[0], SImode)
|| s_register_operand (operands[1], SImode))"
"*
switch (which_alternative)
{
- case 0: case 1:
- return \"mov%?\\t%0, %1\";
+ case 0:
+ case 1:
case 2:
- return \"mvn%?\\t%0, #%B1\";
+ return \"mov%?\\t%0, %1\";
case 3:
- return \"movw%?\\t%0, %1\";
+ return \"mvn%?\\t%0, #%B1\";
case 4:
+ return \"movw%?\\t%0, %1\";
case 5:
- return \"ldr%?\\t%0, %1\";
case 6:
+ return \"ldr%?\\t%0, %1\";
case 7:
- return \"str%?\\t%1, %0\";
case 8:
- return \"fmsr%?\\t%0, %1\\t%@ int\";
+ return \"str%?\\t%1, %0\";
case 9:
- return \"fmrs%?\\t%0, %1\\t%@ int\";
+ return \"fmsr%?\\t%0, %1\\t%@ int\";
case 10:
+ return \"fmrs%?\\t%0, %1\\t%@ int\";
+ case 11:
return \"fcpys%?\\t%0, %1\\t%@ int\";
- case 11: case 12:
+ case 12: case 13:
return output_move_vfp (operands);
default:
gcc_unreachable ();
}
"
[(set_attr "predicable" "yes")
- (set_attr "type" "*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
- (set_attr "neon_type" "*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
- (set_attr "insn" "mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
- (set_attr "pool_range" "*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
- (set_attr "neg_pool_range" "*,*,*,*, 0, 0,*,*,*,*,*,1008,*")]
+ (set_attr "predicable_short_it" "yes,no,yes,no,no,no,no,no,no,no,no,no,no,no")
+ (set_attr "type" "*,*,*,*,*,load1,load1,store1,store1,r_2_f,f_2_r,fcpys,f_loads,f_stores")
+ (set_attr "length" "2,4,2,4,4,4,4,4,4,4,4,4,4,4")
+ (set_attr "neon_type" "*,*,*,*,*,*,*,*,*,neon_mcr,neon_mrc,neon_vmov,*,*")
+ (set_attr "insn" "mov,mov,mov,mvn,mov,*,*,*,*,*,*,*,*,*")
+ (set_attr "pool_range" "*,*,*,*,*,1018,4094,*,*,*,*,*,1018,*")
+ (set_attr "neg_pool_range" "*,*,*,*,*, 0, 0,*,*,*,*,*,1008,*")]
)
@@ -412,6 +394,7 @@
}
"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type"
"r_2_f,f_2_r,fconsts,f_loads,f_stores,load1,store1,fcpys,*")
(set_attr "neon_type" "neon_mcr,neon_mrc,*,*,*,*,*,neon_vmov,*")
@@ -420,7 +403,6 @@
(set_attr "neg_pool_range" "*,*,*,1008,*,0,*,*,*")]
)
-
;; DFmode moves
(define_insn "*movdf_vfp"
@@ -550,7 +532,7 @@
[(match_operand 4 "cc_register" "") (const_int 0)])
(match_operand:SF 1 "s_register_operand" "0,t,t,0,?r,?r,0,t,t")
(match_operand:SF 2 "s_register_operand" "t,0,t,?r,0,?r,t,0,t")))]
- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP"
+ "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP && !arm_restrict_it"
"@
it\\t%D3\;fcpys%D3\\t%0, %2
it\\t%d3\;fcpys%d3\\t%0, %1
@@ -598,7 +580,7 @@
[(match_operand 4 "cc_register" "") (const_int 0)])
(match_operand:DF 1 "s_register_operand" "0,w,w,0,?r,?r,0,w,w")
(match_operand:DF 2 "s_register_operand" "w,0,w,?r,0,?r,w,0,w")))]
- "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
+ "TARGET_THUMB2 && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE && !arm_restrict_it"
"@
it\\t%D3\;fcpyd%D3\\t%P0, %P2
it\\t%d3\;fcpyd%d3\\t%P0, %P1
@@ -624,6 +606,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fabss%?\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "ffariths")]
)
@@ -633,6 +616,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fabsd%?\\t%P0, %P1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "ffarithd")]
)
@@ -644,6 +628,7 @@
fnegs%?\\t%0, %1
eor%?\\t%0, %1, #-2147483648"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "ffariths")]
)
@@ -689,6 +674,7 @@
}
"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "length" "4,4,8")
(set_attr "type" "ffarithd")]
)
@@ -703,6 +689,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fadds%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fadds")]
)
@@ -713,6 +700,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"faddd%?\\t%P0, %P1, %P2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "faddd")]
)
@@ -724,6 +712,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fsubs%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fadds")]
)
@@ -734,6 +723,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fsubd%?\\t%P0, %P1, %P2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "faddd")]
)
@@ -747,6 +737,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fdivs%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fdivs")]
)
@@ -757,6 +748,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fdivd%?\\t%P0, %P1, %P2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fdivd")]
)
@@ -770,6 +762,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fmuls%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmuls")]
)
@@ -780,6 +773,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fmuld%?\\t%P0, %P1, %P2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmuld")]
)
@@ -790,6 +784,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fnmuls%?\\t%0, %1, %2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmuls")]
)
@@ -800,6 +795,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fnmuld%?\\t%P0, %P1, %P2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmuld")]
)
@@ -815,6 +811,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fmacs%?\\t%0, %2, %3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmacs")]
)
@@ -826,6 +823,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fmacd%?\\t%P0, %P2, %P3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmacd")]
)
@@ -838,6 +836,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fmscs%?\\t%0, %2, %3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmacs")]
)
@@ -849,6 +848,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fmscd%?\\t%P0, %P2, %P3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmacd")]
)
@@ -861,6 +861,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fnmacs%?\\t%0, %2, %3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmacs")]
)
@@ -872,6 +873,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fnmacd%?\\t%P0, %P2, %P3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmacd")]
)
@@ -886,6 +888,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fnmscs%?\\t%0, %2, %3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmacs")]
)
@@ -898,6 +901,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fnmscd%?\\t%P0, %P2, %P3"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fmacd")]
)
@@ -911,6 +915,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
"vfma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "ffma<vfp_type>")]
)
@@ -923,6 +928,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
"vfms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "ffma<vfp_type>")]
)
@@ -934,6 +940,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
"vfnms%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "ffma<vfp_type>")]
)
@@ -946,6 +953,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA"
"vfnma%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "ffma<vfp_type>")]
)
@@ -958,6 +966,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fcvtds%?\\t%P0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -967,6 +976,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fcvtsd%?\\t%0, %P1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -976,6 +986,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
"vcvtb%?.f32.f16\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -985,6 +996,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FP16"
"vcvtb%?.f16.f32\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -994,6 +1006,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"ftosizs%?\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -1003,6 +1016,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"ftosizd%?\\t%0, %P1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -1013,6 +1027,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"ftouizs%?\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -1022,6 +1037,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"ftouizd%?\\t%0, %P1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -1032,6 +1048,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fsitos%?\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -1041,6 +1058,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fsitod%?\\t%P0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -1051,6 +1069,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fuitos%?\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -1060,6 +1079,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fuitod%?\\t%P0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_cvt")]
)
@@ -1072,6 +1092,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP"
"fsqrts%?\\t%0, %1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fdivs")]
)
@@ -1081,6 +1102,7 @@
"TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP_DOUBLE"
"fsqrtd%?\\t%P0, %P1"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fdivd")]
)
@@ -1168,6 +1190,7 @@
fcmps%?\\t%0, %1
fcmpzs%?\\t%0"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fcmps")]
)
@@ -1180,6 +1203,7 @@
fcmpes%?\\t%0, %1
fcmpezs%?\\t%0"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fcmps")]
)
@@ -1192,6 +1216,7 @@
fcmpd%?\\t%P0, %P1
fcmpzd%?\\t%P0"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fcmpd")]
)
@@ -1204,6 +1229,7 @@
fcmped%?\\t%P0, %P1
fcmpezd%?\\t%P0"
[(set_attr "predicable" "yes")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "fcmpd")]
)
@@ -1263,6 +1289,7 @@
"TARGET_HARD_FLOAT && TARGET_FPU_ARMV8 <vfp_double_cond>"
"vrint<vrint_variant>%?.<V_if_elem>\\t%<V_reg>0, %<V_reg>1"
[(set_attr "predicable" "<vrint_predicable>")
+ (set_attr "predicable_short_it" "no")
(set_attr "type" "f_rint<vfp_type>")]
)
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index ce331cbe363..c30a9718e76 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -134,7 +134,7 @@ extern c6x_cpu_t c6x_arch;
Really only externally visible arrays must be aligned this way, as
only those are directly visible from another compilation unit. But
we don't have that information available here. */
-#define DATA_ALIGNMENT(TYPE, ALIGN) \
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
(((ALIGN) < BITS_PER_UNIT * 8 && TREE_CODE (TYPE) == ARRAY_TYPE) \
? BITS_PER_UNIT * 8 : (ALIGN))
diff --git a/gcc/config/i386/ammintrin.h b/gcc/config/i386/ammintrin.h
index 311292c7a44..297b98dd0d8 100644
--- a/gcc/config/i386/ammintrin.h
+++ b/gcc/config/i386/ammintrin.h
@@ -27,13 +27,15 @@
#ifndef _AMMINTRIN_H_INCLUDED
#define _AMMINTRIN_H_INCLUDED
-#ifndef __SSE4A__
-# error "SSE4A instruction set not enabled"
-#else
-
/* We need definitions from the SSE3, SSE2 and SSE header files*/
#include <pmmintrin.h>
+#ifndef __SSE4A__
+#pragma GCC push_options
+#pragma GCC target("sse4a")
+#define __DISABLE_SSE4A__
+#endif /* __SSE4A__ */
+
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_stream_sd (double * __P, __m128d __Y)
{
@@ -83,6 +85,9 @@ _mm_inserti_si64(__m128i __X, __m128i __Y, unsigned const int __I, unsigned cons
(unsigned int)(I), (unsigned int)(L)))
#endif
-#endif /* __SSE4A__ */
+#ifdef __DISABLE_SSE4A__
+#undef __DISABLE_SSE4A__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4A__ */
#endif /* _AMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avx2intrin.h b/gcc/config/i386/avx2intrin.h
index 1537bf5add0..4030dfe2bc2 100644
--- a/gcc/config/i386/avx2intrin.h
+++ b/gcc/config/i386/avx2intrin.h
@@ -25,6 +25,15 @@
# error "Never use <avx2intrin.h> directly; include <immintrin.h> instead."
#endif
+#ifndef _AVX2INTRIN_H_INCLUDED
+#define _AVX2INTRIN_H_INCLUDED
+
+#ifndef __AVX2__
+#pragma GCC push_options
+#pragma GCC target("avx2")
+#define __DISABLE_AVX2__
+#endif /* __AVX2__ */
+
/* Sum absolute 8-bit integer difference of adjacent groups of 4
byte integers in the first 2 operands. Starting offsets within
operands are determined by the 3rd mask operand. */
@@ -1871,3 +1880,10 @@ _mm256_mask_i64gather_epi32 (__m128i src, int const *base,
(__v4si)(__m128i)MASK, \
(int)SCALE)
#endif /* __OPTIMIZE__ */
+
+#ifdef __DISABLE_AVX2__
+#undef __DISABLE_AVX2__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX2__ */
+
+#endif /* _AVX2INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/avxintrin.h b/gcc/config/i386/avxintrin.h
index b75de451af9..7f2109a7299 100644
--- a/gcc/config/i386/avxintrin.h
+++ b/gcc/config/i386/avxintrin.h
@@ -28,6 +28,15 @@
# error "Never use <avxintrin.h> directly; include <immintrin.h> instead."
#endif
+#ifndef _AVXINTRIN_H_INCLUDED
+#define _AVXINTRIN_H_INCLUDED
+
+#ifndef __AVX__
+#pragma GCC push_options
+#pragma GCC target("avx")
+#define __DISABLE_AVX__
+#endif /* __AVX__ */
+
/* Internal data types for implementing the intrinsics. */
typedef double __v4df __attribute__ ((__vector_size__ (32)));
typedef float __v8sf __attribute__ ((__vector_size__ (32)));
@@ -1424,3 +1433,10 @@ _mm256_castsi128_si256 (__m128i __A)
{
return (__m256i) __builtin_ia32_si256_si ((__v4si)__A);
}
+
+#ifdef __DISABLE_AVX__
+#undef __DISABLE_AVX__
+#pragma GCC pop_options
+#endif /* __DISABLE_AVX__ */
+
+#endif /* _AVXINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/bmi2intrin.h b/gcc/config/i386/bmi2intrin.h
index 929ea20b970..0c6cb9616c8 100644
--- a/gcc/config/i386/bmi2intrin.h
+++ b/gcc/config/i386/bmi2intrin.h
@@ -25,13 +25,15 @@
# error "Never use <bmi2intrin.h> directly; include <x86intrin.h> instead."
#endif
-#ifndef __BMI2__
-# error "BMI2 instruction set not enabled"
-#endif /* __BMI2__ */
-
#ifndef _BMI2INTRIN_H_INCLUDED
#define _BMI2INTRIN_H_INCLUDED
+#ifndef __BMI2__
+#pragma GCC push_options
+#pragma GCC target("bmi2")
+#define __DISABLE_BMI2__
+#endif /* __BMI2__ */
+
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_bzhi_u32 (unsigned int __X, unsigned int __Y)
@@ -99,4 +101,9 @@ _mulx_u32 (unsigned int __X, unsigned int __Y, unsigned int *__P)
#endif /* !__x86_64__ */
+#ifdef __DISABLE_BMI2__
+#undef __DISABLE_BMI2__
+#pragma GCC pop_options
+#endif /* __DISABLE_BMI2__ */
+
#endif /* _BMI2INTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/bmiintrin.h b/gcc/config/i386/bmiintrin.h
index 0087f5c06e0..281ebaaf4f2 100644
--- a/gcc/config/i386/bmiintrin.h
+++ b/gcc/config/i386/bmiintrin.h
@@ -25,13 +25,15 @@
# error "Never use <bmiintrin.h> directly; include <x86intrin.h> instead."
#endif
-#ifndef __BMI__
-# error "BMI instruction set not enabled"
-#endif /* __BMI__ */
-
#ifndef _BMIINTRIN_H_INCLUDED
#define _BMIINTRIN_H_INCLUDED
+#ifndef __BMI__
+#pragma GCC push_options
+#pragma GCC target("bmi")
+#define __DISABLE_BMI__
+#endif /* __BMI__ */
+
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__tzcnt_u16 (unsigned short __X)
{
@@ -52,6 +54,12 @@ __bextr_u32 (unsigned int __X, unsigned int __Y)
}
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u32 (unsigned int __X, unsigned int __Y, unsigned __Z)
+{
+ return __builtin_ia32_bextr_u32 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsi_u32 (unsigned int __X)
{
return __X & -__X;
@@ -91,6 +99,12 @@ __bextr_u64 (unsigned long long __X, unsigned long long __Y)
}
extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+_bextr_u64 (unsigned long long __X, unsigned int __Y, unsigned int __Z)
+{
+ return __builtin_ia32_bextr_u64 (__X, ((__Y & 0xff) | ((__Z & 0xff) << 8)));
+}
+
+extern __inline unsigned long long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__blsi_u64 (unsigned long long __X)
{
return __X & -__X;
@@ -116,4 +130,9 @@ __tzcnt_u64 (unsigned long long __X)
#endif /* __x86_64__ */
+#ifdef __DISABLE_BMI__
+#undef __DISABLE_BMI__
+#pragma GCC pop_options
+#endif /* __DISABLE_BMI__ */
+
#endif /* _BMIINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index e28f098b03a..249c4cd1d53 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -674,8 +674,14 @@ const char *host_detect_local_cpu (int argc, const char **argv)
/* Assume Sandy Bridge. */
cpu = "corei7-avx";
else if (has_sse4_2)
- /* Assume Core i7. */
- cpu = "corei7";
+ {
+ if (has_movbe)
+ /* Assume SLM. */
+ cpu = "slm";
+ else
+ /* Assume Core i7. */
+ cpu = "corei7";
+ }
else if (has_ssse3)
{
if (has_movbe)
diff --git a/gcc/config/i386/emmintrin.h b/gcc/config/i386/emmintrin.h
index cf404a13536..c30f05657d6 100644
--- a/gcc/config/i386/emmintrin.h
+++ b/gcc/config/i386/emmintrin.h
@@ -27,13 +27,15 @@
#ifndef _EMMINTRIN_H_INCLUDED
#define _EMMINTRIN_H_INCLUDED
-#ifndef __SSE2__
-# error "SSE2 instruction set not enabled"
-#else
-
/* We need definitions from the SSE header files*/
#include <xmmintrin.h>
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
/* SSE2 */
typedef double __v2df __attribute__ ((__vector_size__ (16)));
typedef long long __v2di __attribute__ ((__vector_size__ (16)));
@@ -1515,6 +1517,9 @@ _mm_castsi128_pd(__m128i __A)
return (__m128d) __A;
}
-#endif /* __SSE2__ */
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
#endif /* _EMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/f16cintrin.h b/gcc/config/i386/f16cintrin.h
index 88903c16231..76f35fa1eac 100644
--- a/gcc/config/i386/f16cintrin.h
+++ b/gcc/config/i386/f16cintrin.h
@@ -25,13 +25,15 @@
# error "Never use <f16intrin.h> directly; include <x86intrin.h> or <immintrin.h> instead."
#endif
-#ifndef __F16C__
-# error "F16C instruction set not enabled"
-#else
-
#ifndef _F16CINTRIN_H_INCLUDED
#define _F16CINTRIN_H_INCLUDED
+#ifndef __F16C__
+#pragma GCC push_options
+#pragma GCC target("f16c")
+#define __DISABLE_F16C__
+#endif /* __F16C__ */
+
extern __inline float __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_cvtsh_ss (unsigned short __S)
{
@@ -88,5 +90,9 @@ _mm256_cvtps_ph (__m256 __A, const int __I)
((__m128i) __builtin_ia32_vcvtps2ph256 ((__v8sf)(__m256) A, (int) (I)))
#endif /* __OPTIMIZE */
+#ifdef __DISABLE_F16C__
+#undef __DISABLE_F16C__
+#pragma GCC pop_options
+#endif /* __DISABLE_F16C__ */
+
#endif /* _F16CINTRIN_H_INCLUDED */
-#endif /* __F16C__ */
diff --git a/gcc/config/i386/fma4intrin.h b/gcc/config/i386/fma4intrin.h
index 00ba7813123..e615f3e7ba0 100644
--- a/gcc/config/i386/fma4intrin.h
+++ b/gcc/config/i386/fma4intrin.h
@@ -28,13 +28,15 @@
#ifndef _FMA4INTRIN_H_INCLUDED
#define _FMA4INTRIN_H_INCLUDED
-#ifndef __FMA4__
-# error "FMA4 instruction set not enabled"
-#else
-
/* We need definitions from the SSE4A, SSE3, SSE2 and SSE header files. */
#include <ammintrin.h>
+#ifndef __FMA4__
+#pragma GCC push_options
+#pragma GCC target("fma4")
+#define __DISABLE_FMA4__
+#endif /* __FMA4__ */
+
/* 128b Floating point multiply/add type instructions. */
extern __inline __m128 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_macc_ps (__m128 __A, __m128 __B, __m128 __C)
@@ -231,6 +233,9 @@ _mm256_msubadd_pd (__m256d __A, __m256d __B, __m256d __C)
return (__m256d) __builtin_ia32_vfmaddsubpd256 ((__v4df)__A, (__v4df)__B, -(__v4df)__C);
}
-#endif
+#ifdef __DISABLE_FMA4__
+#undef __DISABLE_FMA4__
+#pragma GCC pop_options
+#endif /* __DISABLE_FMA4__ */
#endif
diff --git a/gcc/config/i386/fmaintrin.h b/gcc/config/i386/fmaintrin.h
index 6ede84b18d4..97de93fd146 100644
--- a/gcc/config/i386/fmaintrin.h
+++ b/gcc/config/i386/fmaintrin.h
@@ -29,8 +29,10 @@
#define _FMAINTRIN_H_INCLUDED
#ifndef __FMA__
-# error "FMA instruction set not enabled"
-#else
+#pragma GCC push_options
+#pragma GCC target("fma")
+#define __DISABLE_FMA__
+#endif /* __FMA__ */
extern __inline __m128d
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -292,6 +294,9 @@ _mm256_fmsubadd_ps (__m256 __A, __m256 __B, __m256 __C)
-(__v8sf)__C);
}
-#endif
+#ifdef __DISABLE_FMA__
+#undef __DISABLE_FMA__
+#pragma GCC pop_options
+#endif /* __DISABLE_FMA__ */
#endif
diff --git a/gcc/config/i386/fxsrintrin.h b/gcc/config/i386/fxsrintrin.h
index 9b63222c835..41d4085b010 100644
--- a/gcc/config/i386/fxsrintrin.h
+++ b/gcc/config/i386/fxsrintrin.h
@@ -28,6 +28,12 @@
#ifndef _FXSRINTRIN_H_INCLUDED
#define _FXSRINTRIN_H_INCLUDED
+#ifndef __FXSR__
+#pragma GCC push_options
+#pragma GCC target("fxsr")
+#define __DISABLE_FXSR__
+#endif /* __FXSR__ */
+
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_fxsave (void *__P)
@@ -58,4 +64,10 @@ _fxrstor64 (void *__P)
}
#endif
+#ifdef __DISABLE_FXSR__
+#undef __DISABLE_FXSR__
+#pragma GCC pop_options
+#endif /* __DISABLE_FXSR__ */
+
+
#endif /* _FXSRINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/gnu.h b/gcc/config/i386/gnu.h
index 35063e68b1e..4a91c843685 100644
--- a/gcc/config/i386/gnu.h
+++ b/gcc/config/i386/gnu.h
@@ -36,6 +36,12 @@ along with GCC. If not, see <http://www.gnu.org/licenses/>.
#endif
#ifdef TARGET_LIBC_PROVIDES_SSP
+
+/* Not supported yet. */
+# undef TARGET_THREAD_SSP_OFFSET
+
/* Not supported yet. */
-#undef TARGET_THREAD_SSP_OFFSET
+# undef TARGET_CAN_SPLIT_STACK
+# undef TARGET_THREAD_SPLIT_STACK_OFFSET
+
#endif
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 51fec844bdf..31dd28a94cb 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -149,6 +149,10 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
def_or_undef (parse_in, "__atom");
def_or_undef (parse_in, "__atom__");
break;
+ case PROCESSOR_SLM:
+ def_or_undef (parse_in, "__slm");
+ def_or_undef (parse_in, "__slm__");
+ break;
/* use PROCESSOR_max to not set/unset the arch macro. */
case PROCESSOR_max:
break;
@@ -241,6 +245,9 @@ ix86_target_macros_internal (HOST_WIDE_INT isa_flag,
case PROCESSOR_ATOM:
def_or_undef (parse_in, "__tune_atom__");
break;
+ case PROCESSOR_SLM:
+ def_or_undef (parse_in, "__tune_slm__");
+ break;
case PROCESSOR_GENERIC32:
case PROCESSOR_GENERIC64:
break;
@@ -369,20 +376,23 @@ ix86_pragma_target_parse (tree args, tree pop_target)
if (! args)
{
- cur_tree = ((pop_target)
- ? pop_target
- : target_option_default_node);
+ cur_tree = (pop_target ? pop_target : target_option_default_node);
cl_target_option_restore (&global_options,
TREE_TARGET_OPTION (cur_tree));
}
else
{
cur_tree = ix86_valid_target_attribute_tree (args);
- if (!cur_tree)
- return false;
+ if (!cur_tree || cur_tree == error_mark_node)
+ {
+ cl_target_option_restore (&global_options,
+ TREE_TARGET_OPTION (prev_tree));
+ return false;
+ }
}
target_option_current_node = cur_tree;
+ ix86_reset_previous_fndecl ();
/* Figure out the previous/current isa, arch, tune and the differences. */
prev_opt = TREE_TARGET_OPTION (prev_tree);
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index ef4dc761d5a..09667893910 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -40,6 +40,8 @@ extern void ix86_output_addr_diff_elt (FILE *, int, int);
extern enum calling_abi ix86_cfun_abi (void);
extern enum calling_abi ix86_function_type_abi (const_tree);
+extern void ix86_reset_previous_fndecl (void);
+
#ifdef RTX_CODE
extern int standard_80387_constant_p (rtx);
extern const char *standard_80387_constant_opcode (rtx);
@@ -207,7 +209,7 @@ extern void init_cumulative_args (CUMULATIVE_ARGS *, tree, rtx, tree, int);
#endif /* RTX_CODE */
#ifdef TREE_CODE
-extern int ix86_data_alignment (tree, int);
+extern int ix86_data_alignment (tree, int, bool);
extern unsigned int ix86_local_alignment (tree, enum machine_mode,
unsigned int);
extern unsigned int ix86_minimum_alignment (tree, enum machine_mode,
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index 3470fef77b2..2a65fc2a6de 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -1482,6 +1482,79 @@ struct processor_costs atom_cost = {
1, /* cond_not_taken_branch_cost. */
};
+static const
+struct processor_costs slm_cost = {
+ COSTS_N_INSNS (1), /* cost of an add instruction */
+ COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
+ COSTS_N_INSNS (1), /* variable shift costs */
+ COSTS_N_INSNS (1), /* constant shift costs */
+ {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
+ COSTS_N_INSNS (4), /* HI */
+ COSTS_N_INSNS (3), /* SI */
+ COSTS_N_INSNS (4), /* DI */
+ COSTS_N_INSNS (2)}, /* other */
+ 0, /* cost of multiply per each bit set */
+ {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
+ COSTS_N_INSNS (26), /* HI */
+ COSTS_N_INSNS (42), /* SI */
+ COSTS_N_INSNS (74), /* DI */
+ COSTS_N_INSNS (74)}, /* other */
+ COSTS_N_INSNS (1), /* cost of movsx */
+ COSTS_N_INSNS (1), /* cost of movzx */
+ 8, /* "large" insn */
+ 17, /* MOVE_RATIO */
+ 4, /* cost for loading QImode using movzbl */
+ {4, 4, 4}, /* cost of loading integer registers
+ in QImode, HImode and SImode.
+ Relative to reg-reg move (2). */
+ {4, 4, 4}, /* cost of storing integer registers */
+ 4, /* cost of reg,reg fld/fst */
+ {12, 12, 12}, /* cost of loading fp registers
+ in SFmode, DFmode and XFmode */
+ {6, 6, 8}, /* cost of storing fp registers
+ in SFmode, DFmode and XFmode */
+ 2, /* cost of moving MMX register */
+ {8, 8}, /* cost of loading MMX registers
+ in SImode and DImode */
+ {8, 8}, /* cost of storing MMX registers
+ in SImode and DImode */
+ 2, /* cost of moving SSE register */
+ {8, 8, 8}, /* cost of loading SSE registers
+ in SImode, DImode and TImode */
+ {8, 8, 8}, /* cost of storing SSE registers
+ in SImode, DImode and TImode */
+ 5, /* MMX or SSE register to integer */
+ 32, /* size of l1 cache. */
+ 256, /* size of l2 cache. */
+ 64, /* size of prefetch block */
+ 6, /* number of parallel prefetches */
+ 3, /* Branch cost */
+ COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
+ COSTS_N_INSNS (8), /* cost of FMUL instruction. */
+ COSTS_N_INSNS (20), /* cost of FDIV instruction. */
+ COSTS_N_INSNS (8), /* cost of FABS instruction. */
+ COSTS_N_INSNS (8), /* cost of FCHS instruction. */
+ COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
+ {{libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+ {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+ {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+ {{libcall, {{8, loop, false}, {15, unrolled_loop, false},
+ {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+ {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+ {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}},
+ 1, /* scalar_stmt_cost. */
+ 1, /* scalar load_cost. */
+ 1, /* scalar_store_cost. */
+ 1, /* vec_stmt_cost. */
+ 1, /* vec_to_scalar_cost. */
+ 1, /* scalar_to_vec_cost. */
+ 1, /* vec_align_load_cost. */
+ 2, /* vec_unalign_load_cost. */
+ 1, /* vec_store_cost. */
+ 3, /* cond_taken_branch_cost. */
+ 1, /* cond_not_taken_branch_cost. */
+};
+
/* Generic64 should produce code tuned for Nocona and K8. */
static const
struct processor_costs generic64_cost = {
@@ -1735,6 +1808,7 @@ const struct processor_costs *ix86_cost = &pentium_cost;
#define m_HASWELL (1<<PROCESSOR_HASWELL)
#define m_CORE_ALL (m_CORE2 | m_COREI7 | m_HASWELL)
#define m_ATOM (1<<PROCESSOR_ATOM)
+#define m_SLM (1<<PROCESSOR_SLM)
#define m_GEODE (1<<PROCESSOR_GEODE)
#define m_K6 (1<<PROCESSOR_K6)
@@ -1778,7 +1852,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_486 | m_PENT,
/* X86_TUNE_UNROLL_STRLEN */
- m_486 | m_PENT | m_PPRO | m_ATOM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
+ m_486 | m_PENT | m_PPRO | m_ATOM | m_SLM | m_CORE_ALL | m_K6 | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
on simulation result. But after P4 was made, no performance benefit
@@ -1790,11 +1864,11 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
~m_386,
/* X86_TUNE_USE_SAHF */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_BDVER | m_BTVER | m_GENERIC,
/* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
partial dependencies. */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
register stalls on Generic32 compilation setting as well. However
@@ -1817,13 +1891,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_386 | m_486 | m_K6_GEODE,
/* X86_TUNE_USE_SIMODE_FIOP */
- ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC),
+ ~(m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_USE_MOV0 */
m_K6,
/* X86_TUNE_USE_CLTD */
- ~(m_PENT | m_ATOM | m_K6),
+ ~(m_PENT | m_ATOM | m_SLM | m_K6),
/* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
m_PENT4,
@@ -1838,7 +1912,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
~(m_PENT | m_PPRO),
/* X86_TUNE_PROMOTE_QIMODE */
- m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
+ m_386 | m_486 | m_PENT | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_FAST_PREFIX */
~(m_386 | m_486 | m_PENT),
@@ -1879,10 +1953,10 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
for DFmode copies */
- ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GEODE | m_AMD_MULTIPLE | m_ATOM | m_GENERIC),
+ ~(m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GEODE | m_AMD_MULTIPLE | m_GENERIC),
/* X86_TUNE_PARTIAL_REG_DEPENDENCY */
- m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+ m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
conflict here in between PPro/Pentium4 based chips that thread 128bit
@@ -1893,13 +1967,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
shows that disabling this option on P4 brings over 20% SPECfp regression,
while enabling it on K8 brings roughly 2.4% regression that can be partly
masked by careful scheduling of moves. */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMDFAM10 | m_BDVER | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMDFAM10 | m_BDVER | m_GENERIC,
/* X86_TUNE_SSE_UNALIGNED_LOAD_OPTIMAL */
- m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER,
+ m_COREI7 | m_AMDFAM10 | m_BDVER | m_BTVER | m_SLM,
/* X86_TUNE_SSE_UNALIGNED_STORE_OPTIMAL */
- m_COREI7 | m_BDVER,
+ m_COREI7 | m_BDVER | m_SLM,
/* X86_TUNE_SSE_PACKED_SINGLE_INSN_OPTIMAL */
m_BDVER ,
@@ -1917,7 +1991,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_PPRO | m_P4_NOCONA,
/* X86_TUNE_MEMORY_MISMATCH_STALL */
- m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+ m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_PROLOGUE_USING_MOVE */
m_PPRO | m_ATHLON_K8,
@@ -1942,16 +2016,16 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
than 4 branch instructions in the 16 byte window. */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_SCHEDULE */
- m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
+ m_PENT | m_PPRO | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_USE_BT */
- m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC,
+ m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC,
/* X86_TUNE_USE_INCDEC */
- ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_GENERIC),
+ ~(m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_GENERIC),
/* X86_TUNE_PAD_RETURNS */
m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC,
@@ -1960,7 +2034,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
m_ATOM,
/* X86_TUNE_EXT_80387_CONSTANTS */
- m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
+ m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_K6_GEODE | m_ATHLON_K8 | m_GENERIC,
/* X86_TUNE_AVOID_VECTOR_DECODE */
m_CORE_ALL | m_K8 | m_GENERIC64,
@@ -2005,13 +2079,13 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_OPT_AGU: Optimize for Address Generation Unit. This flag
will impact LEA instruction selection. */
- m_ATOM,
+ m_ATOM | m_SLM,
/* X86_TUNE_VECTORIZE_DOUBLE: Enable double precision vector
instructions. */
~m_ATOM,
- /* X86_SOFTARE_PREFETCHING_BENEFICIAL: Enable software prefetching
+ /* X86_TUNE_SOFTWARE_PREFETCHING_BENEFICIAL: Enable software prefetching
at -O3. For the moment, the prefetching seems badly tuned for Intel
chips. */
m_K6_GEODE | m_AMD_MULTIPLE,
@@ -2026,7 +2100,7 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_REASSOC_FP_TO_PARALLEL: Try to produce parallel computations
during reassociation of fp computation. */
- m_ATOM | m_HASWELL,
+ m_ATOM | m_SLM | m_HASWELL | m_BDVER1 | m_BDVER2,
/* X86_TUNE_GENERAL_REGS_SSE_SPILL: Try to spill general regs to SSE
regs instead of memory. */
@@ -2034,7 +2108,12 @@ static unsigned int initial_ix86_tune_features[X86_TUNE_LAST] = {
/* X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE: Try to avoid memory operands for
a conditional move. */
- m_ATOM
+ m_ATOM,
+
+ /* X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS: Try to split memory operand for
+ fp converts to destination register. */
+ m_SLM
+
};
/* Feature tests against the various architecture variations. */
@@ -2060,10 +2139,10 @@ static unsigned int initial_ix86_arch_features[X86_ARCH_LAST] = {
};
static const unsigned int x86_accumulate_outgoing_args
- = m_PPRO | m_P4_NOCONA | m_ATOM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
+ = m_PPRO | m_P4_NOCONA | m_ATOM | m_SLM | m_CORE_ALL | m_AMD_MULTIPLE | m_GENERIC;
static const unsigned int x86_arch_always_fancy_math_387
- = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_AMD_MULTIPLE | m_GENERIC;
+ = m_PENT | m_PPRO | m_P4_NOCONA | m_CORE_ALL | m_ATOM | m_SLM | m_AMD_MULTIPLE | m_GENERIC;
static const unsigned int x86_avx256_split_unaligned_load
= m_COREI7 | m_GENERIC;
@@ -2458,7 +2537,8 @@ static const struct ptt processor_target_table[PROCESSOR_max] =
{&bdver3_cost, 16, 10, 16, 7, 11},
{&btver1_cost, 16, 10, 16, 7, 11},
{&btver2_cost, 16, 10, 16, 7, 11},
- {&atom_cost, 16, 15, 16, 7, 16}
+ {&atom_cost, 16, 15, 16, 7, 16},
+ {&slm_cost, 16, 15, 16, 7, 16}
};
static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
@@ -2479,6 +2559,7 @@ static const char *const cpu_names[TARGET_CPU_DEFAULT_max] =
"corei7",
"core-avx2",
"atom",
+ "slm",
"geode",
"k6",
"k6-2",
@@ -2940,6 +3021,10 @@ ix86_option_override_internal (bool main_args_p)
{"atom", PROCESSOR_ATOM, CPU_ATOM,
PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
| PTA_SSSE3 | PTA_CX16 | PTA_MOVBE | PTA_FXSR},
+ {"slm", PROCESSOR_SLM, CPU_SLM,
+ PTA_64BIT | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
+ | PTA_SSSE3 | PTA_SSE4_1 | PTA_SSE4_2 | PTA_CX16 | PTA_MOVBE
+ | PTA_FXSR},
{"geode", PROCESSOR_GEODE, CPU_GEODE,
PTA_MMX | PTA_3DNOW | PTA_3DNOW_A | PTA_PREFETCH_SSE | PTA_PRFCHW},
{"k6", PROCESSOR_K6, CPU_K6, PTA_MMX},
@@ -4564,6 +4649,13 @@ ix86_can_inline_p (tree caller, tree callee)
/* Remember the last target of ix86_set_current_function. */
static GTY(()) tree ix86_previous_fndecl;
+/* Invalidate ix86_previous_fndecl cache. */
+void
+ix86_reset_previous_fndecl (void)
+{
+ ix86_previous_fndecl = NULL_TREE;
+}
+
/* Establish appropriate back-end context for processing the function
FNDECL. The argument might be NULL to indicate processing at top
level, outside of any function scope. */
@@ -6413,7 +6505,7 @@ construct_container (enum machine_mode mode, enum machine_mode orig_mode,
/* Likewise, error if the ABI requires us to return values in the
x87 registers and the user specified -mno-80387. */
- if (!TARGET_80387 && in_return)
+ if (!TARGET_FLOAT_RETURNS_IN_80387 && in_return)
for (i = 0; i < n; i++)
if (regclass[i] == X86_64_X87_CLASS
|| regclass[i] == X86_64_X87UP_CLASS
@@ -17312,10 +17404,24 @@ distance_agu_use (unsigned int regno0, rtx insn)
static bool
ix86_lea_outperforms (rtx insn, unsigned int regno0, unsigned int regno1,
- unsigned int regno2, int split_cost)
+ unsigned int regno2, int split_cost, bool has_scale)
{
int dist_define, dist_use;
+ /* For Silvermont if using a 2-source or 3-source LEA for
+ non-destructive destination purposes, or due to wanting
+ ability to use SCALE, the use of LEA is justified. */
+ if (ix86_tune == PROCESSOR_SLM)
+ {
+ if (has_scale)
+ return true;
+ if (split_cost < 1)
+ return false;
+ if (regno0 == regno1 || regno0 == regno2)
+ return false;
+ return true;
+ }
+
dist_define = distance_non_agu_define (regno1, regno2, insn);
dist_use = distance_agu_use (regno0, insn);
@@ -17404,7 +17510,7 @@ ix86_avoid_lea_for_add (rtx insn, rtx operands[])
if (regno0 == regno1 || regno0 == regno2)
return false;
else
- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1);
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, 1, false);
}
/* Return true if we should emit lea instruction instead of mov
@@ -17426,7 +17532,7 @@ ix86_use_lea_for_mov (rtx insn, rtx operands[])
regno0 = true_regnum (operands[0]);
regno1 = true_regnum (operands[1]);
- return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0);
+ return ix86_lea_outperforms (insn, regno0, regno1, INVALID_REGNUM, 0, false);
}
/* Return true if we need to split lea into a sequence of
@@ -17505,7 +17611,8 @@ ix86_avoid_lea_for_addr (rtx insn, rtx operands[])
split_cost -= 1;
}
- return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost);
+ return !ix86_lea_outperforms (insn, regno0, regno1, regno2, split_cost,
+ parts.scale > 1);
}
/* Emit x86 binary operand CODE in mode MODE, where the first operand
@@ -17690,7 +17797,7 @@ ix86_lea_for_add_ok (rtx insn, rtx operands[])
if (!TARGET_OPT_AGU || optimize_function_for_size_p (cfun))
return false;
- return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0);
+ return ix86_lea_outperforms (insn, regno0, regno1, regno2, 0, false);
}
/* Return true if destination reg of SET_BODY is shift count of
@@ -24199,6 +24306,7 @@ ix86_issue_rate (void)
{
case PROCESSOR_PENTIUM:
case PROCESSOR_ATOM:
+ case PROCESSOR_SLM:
case PROCESSOR_K6:
case PROCESSOR_BTVER2:
return 2;
@@ -24287,6 +24395,73 @@ ix86_agi_dependent (rtx set_insn, rtx use_insn)
return false;
}
+/* Helper function for exact_store_load_dependency.
+ Return true if addr is found in insn. */
+static bool
+exact_dependency_1 (rtx addr, rtx insn)
+{
+ enum rtx_code code;
+ const char *format_ptr;
+ int i, j;
+
+ code = GET_CODE (insn);
+ switch (code)
+ {
+ case MEM:
+ if (rtx_equal_p (addr, insn))
+ return true;
+ break;
+ case REG:
+ CASE_CONST_ANY:
+ case SYMBOL_REF:
+ case CODE_LABEL:
+ case PC:
+ case CC0:
+ case EXPR_LIST:
+ return false;
+ default:
+ break;
+ }
+
+ format_ptr = GET_RTX_FORMAT (code);
+ for (i = 0; i < GET_RTX_LENGTH (code); i++)
+ {
+ switch (*format_ptr++)
+ {
+ case 'e':
+ if (exact_dependency_1 (addr, XEXP (insn, i)))
+ return true;
+ break;
+ case 'E':
+ for (j = 0; j < XVECLEN (insn, i); j++)
+ if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
+ return true;
+ break;
+ }
+ }
+ return false;
+}
+
+/* Return true if there exists exact dependency for store & load, i.e.
+ the same memory address is used in them. */
+static bool
+exact_store_load_dependency (rtx store, rtx load)
+{
+ rtx set1, set2;
+
+ set1 = single_set (store);
+ if (!set1)
+ return false;
+ if (!MEM_P (SET_DEST (set1)))
+ return false;
+ set2 = single_set (load);
+ if (!set2)
+ return false;
+ if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
+ return true;
+ return false;
+}
+
static int
ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
{
@@ -24438,6 +24613,39 @@ ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
else
cost = 0;
}
+ break;
+
+ case PROCESSOR_SLM:
+ if (!reload_completed)
+ return cost;
+
+ /* Increase cost of integer loads. */
+ memory = get_attr_memory (dep_insn);
+ if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+ {
+ enum attr_unit unit = get_attr_unit (dep_insn);
+ if (unit == UNIT_INTEGER && cost == 1)
+ {
+ if (memory == MEMORY_LOAD)
+ cost = 3;
+ else
+ {
+ /* Increase cost of ld/st for short int types only
+ because of store forwarding issue. */
+ rtx set = single_set (dep_insn);
+ if (set && (GET_MODE (SET_DEST (set)) == QImode
+ || GET_MODE (SET_DEST (set)) == HImode))
+ {
+ /* Increase cost of store/load insn if exact
+ dependence exists and it is load insn. */
+ enum attr_memory insn_memory = get_attr_memory (insn);
+ if (insn_memory == MEMORY_LOAD
+ && exact_store_load_dependency (dep_insn, insn))
+ cost = 3;
+ }
+ }
+ }
+ }
default:
break;
@@ -24466,6 +24674,7 @@ ia32_multipass_dfa_lookahead (void)
case PROCESSOR_COREI7:
case PROCESSOR_HASWELL:
case PROCESSOR_ATOM:
+ case PROCESSOR_SLM:
/* Generally, we want haifa-sched:max_issue() to look ahead as far
as many instructions can be executed on a cycle, i.e.,
issue_rate. I wonder why tuning for many CPUs does not do this. */
@@ -24483,110 +24692,204 @@ ia32_multipass_dfa_lookahead (void)
execution. It is applied if
(1) IMUL instruction is on the top of list;
(2) There exists the only producer of independent IMUL instruction in
- ready list;
- (3) Put found producer on the top of ready list.
- Returns issue rate. */
-
+ ready list.
+ Return index of IMUL producer if it was found and -1 otherwise. */
static int
-ix86_sched_reorder(FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
- int clock_var ATTRIBUTE_UNUSED)
+do_reorder_for_imul (rtx *ready, int n_ready)
{
- static int issue_rate = -1;
- int n_ready = *pn_ready;
- rtx insn, insn1, insn2;
- int i;
+ rtx insn, set, insn1, insn2;
sd_iterator_def sd_it;
dep_t dep;
int index = -1;
+ int i;
- /* Set up issue rate. */
- issue_rate = ix86_issue_rate();
-
- /* Do reodering for Atom only. */
if (ix86_tune != PROCESSOR_ATOM)
- return issue_rate;
- /* Do not perform ready list reodering for pre-reload schedule pass. */
- if (!reload_completed)
- return issue_rate;
- /* Nothing to do if ready list contains only 1 instruction. */
- if (n_ready <= 1)
- return issue_rate;
+ return index;
/* Check that IMUL instruction is on the top of ready list. */
insn = ready[n_ready - 1];
- if (!NONDEBUG_INSN_P (insn))
- return issue_rate;
- insn = PATTERN (insn);
- if (GET_CODE (insn) == PARALLEL)
- insn = XVECEXP (insn, 0, 0);
- if (GET_CODE (insn) != SET)
- return issue_rate;
- if (!(GET_CODE (SET_SRC (insn)) == MULT
- && GET_MODE (SET_SRC (insn)) == SImode))
- return issue_rate;
+ set = single_set (insn);
+ if (!set)
+ return index;
+ if (!(GET_CODE (SET_SRC (set)) == MULT
+ && GET_MODE (SET_SRC (set)) == SImode))
+ return index;
/* Search for producer of independent IMUL instruction. */
- for (i = n_ready - 2; i>= 0; i--)
+ for (i = n_ready - 2; i >= 0; i--)
{
insn = ready[i];
if (!NONDEBUG_INSN_P (insn))
- continue;
+ continue;
/* Skip IMUL instruction. */
insn2 = PATTERN (insn);
if (GET_CODE (insn2) == PARALLEL)
- insn2 = XVECEXP (insn2, 0, 0);
+ insn2 = XVECEXP (insn2, 0, 0);
if (GET_CODE (insn2) == SET
- && GET_CODE (SET_SRC (insn2)) == MULT
- && GET_MODE (SET_SRC (insn2)) == SImode)
- continue;
+ && GET_CODE (SET_SRC (insn2)) == MULT
+ && GET_MODE (SET_SRC (insn2)) == SImode)
+ continue;
FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
- {
- rtx con;
+ {
+ rtx con;
con = DEP_CON (dep);
if (!NONDEBUG_INSN_P (con))
continue;
- insn1 = PATTERN (con);
- if (GET_CODE (insn1) == PARALLEL)
- insn1 = XVECEXP (insn1, 0, 0);
-
- if (GET_CODE (insn1) == SET
- && GET_CODE (SET_SRC (insn1)) == MULT
- && GET_MODE (SET_SRC (insn1)) == SImode)
- {
- sd_iterator_def sd_it1;
- dep_t dep1;
- /* Check if there is no other dependee for IMUL. */
- index = i;
- FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
- {
- rtx pro;
- pro = DEP_PRO (dep1);
+ insn1 = PATTERN (con);
+ if (GET_CODE (insn1) == PARALLEL)
+ insn1 = XVECEXP (insn1, 0, 0);
+
+ if (GET_CODE (insn1) == SET
+ && GET_CODE (SET_SRC (insn1)) == MULT
+ && GET_MODE (SET_SRC (insn1)) == SImode)
+ {
+ sd_iterator_def sd_it1;
+ dep_t dep1;
+ /* Check if there is no other dependee for IMUL. */
+ index = i;
+ FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep1);
if (!NONDEBUG_INSN_P (pro))
continue;
- if (pro != insn)
- index = -1;
- }
- if (index >= 0)
- break;
- }
- }
+ if (pro != insn)
+ index = -1;
+ }
+ if (index >= 0)
+ break;
+ }
+ }
if (index >= 0)
- break;
+ break;
}
- if (index < 0)
- return issue_rate; /* Didn't find IMUL producer. */
+ return index;
+}
+
+/* Try to find the best candidate on the top of ready list if two insns
+ have the same priority - candidate is best if its dependees were
+ scheduled earlier. Applied for Silvermont only.
+ Return true if top 2 insns must be interchanged. */
+static bool
+swap_top_of_ready_list (rtx *ready, int n_ready)
+{
+ rtx top = ready[n_ready - 1];
+ rtx next = ready[n_ready - 2];
+ rtx set;
+ sd_iterator_def sd_it;
+ dep_t dep;
+ int clock1 = -1;
+ int clock2 = -1;
+ #define INSN_TICK(INSN) (HID (INSN)->tick)
- if (sched_verbose > 1)
- fprintf(dump, ";;\tatom sched_reorder: swap %d and %d insns\n",
- INSN_UID (ready[index]), INSN_UID (ready[n_ready - 1]));
+ if (ix86_tune != PROCESSOR_SLM)
+ return false;
- /* Put IMUL producer (ready[index]) at the top of ready list. */
- insn1= ready[index];
- for (i = index; i < n_ready - 1; i++)
- ready[i] = ready[i + 1];
- ready[n_ready - 1] = insn1;
+ if (!NONDEBUG_INSN_P (top))
+ return false;
+ if (!NONJUMP_INSN_P (top))
+ return false;
+ if (!NONDEBUG_INSN_P (next))
+ return false;
+ if (!NONJUMP_INSN_P (next))
+ return false;
+ set = single_set (top);
+ if (!set)
+ return false;
+ set = single_set (next);
+ if (!set)
+ return false;
+ if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
+ {
+ if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
+ return false;
+ /* Determine winner more precise. */
+ FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep);
+ if (!NONDEBUG_INSN_P (pro))
+ continue;
+ if (INSN_TICK (pro) > clock1)
+ clock1 = INSN_TICK (pro);
+ }
+ FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
+ {
+ rtx pro;
+ pro = DEP_PRO (dep);
+ if (!NONDEBUG_INSN_P (pro))
+ continue;
+ if (INSN_TICK (pro) > clock2)
+ clock2 = INSN_TICK (pro);
+ }
+
+ if (clock1 == clock2)
+ {
+ /* Determine winner - load must win. */
+ enum attr_memory memory1, memory2;
+ memory1 = get_attr_memory (top);
+ memory2 = get_attr_memory (next);
+ if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
+ return true;
+ }
+ return (bool) (clock2 < clock1);
+ }
+ return false;
+ #undef INSN_TICK
+}
+
+/* Perform possible reodering of ready list for Atom/Silvermont only.
+ Return issue rate. */
+static int
+ix86_sched_reorder (FILE *dump, int sched_verbose, rtx *ready, int *pn_ready,
+ int clock_var)
+{
+ int issue_rate = -1;
+ int n_ready = *pn_ready;
+ int i;
+ rtx insn;
+ int index = -1;
+
+ /* Set up issue rate. */
+ issue_rate = ix86_issue_rate ();
+
+ /* Do reodering for Atom/SLM only. */
+ if (ix86_tune != PROCESSOR_ATOM && ix86_tune != PROCESSOR_SLM)
+ return issue_rate;
+
+ /* Nothing to do if ready list contains only 1 instruction. */
+ if (n_ready <= 1)
+ return issue_rate;
+
+ /* Do reodering for post-reload scheduler only. */
+ if (!reload_completed)
+ return issue_rate;
+
+ if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
+ {
+ if (sched_verbose > 1)
+ fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
+ INSN_UID (ready[index]));
+
+ /* Put IMUL producer (ready[index]) at the top of ready list. */
+ insn = ready[index];
+ for (i = index; i < n_ready - 1; i++)
+ ready[i] = ready[i + 1];
+ ready[n_ready - 1] = insn;
+ return issue_rate;
+ }
+ if (clock_var != 0 && swap_top_of_ready_list (ready, n_ready))
+ {
+ if (sched_verbose > 1)
+ fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
+ INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
+ /* Swap 2 top elements of ready list. */
+ insn = ready[n_ready - 1];
+ ready[n_ready - 1] = ready[n_ready - 2];
+ ready[n_ready - 2] = insn;
+ }
return issue_rate;
}
@@ -25079,11 +25382,12 @@ ix86_constant_alignment (tree exp, int align)
instead of that alignment to align the object. */
int
-ix86_data_alignment (tree type, int align)
+ix86_data_alignment (tree type, int align, bool opt)
{
int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
- if (AGGREGATE_TYPE_P (type)
+ if (opt
+ && AGGREGATE_TYPE_P (type)
&& TYPE_SIZE (type)
&& TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
&& (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
@@ -25095,14 +25399,17 @@ ix86_data_alignment (tree type, int align)
to 16byte boundary. */
if (TARGET_64BIT)
{
- if (AGGREGATE_TYPE_P (type)
- && TYPE_SIZE (type)
- && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
- && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
- || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
+ if ((opt ? AGGREGATE_TYPE_P (type) : TREE_CODE (type) == ARRAY_TYPE)
+ && TYPE_SIZE (type)
+ && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
+ && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
+ || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
return 128;
}
+ if (!opt)
+ return align;
+
if (TREE_CODE (type) == ARRAY_TYPE)
{
if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
@@ -25614,8 +25921,6 @@ enum ix86_builtins
IX86_BUILTIN_CMPNEQSS,
IX86_BUILTIN_CMPNLTSS,
IX86_BUILTIN_CMPNLESS,
- IX86_BUILTIN_CMPNGTSS,
- IX86_BUILTIN_CMPNGESS,
IX86_BUILTIN_CMPORDSS,
IX86_BUILTIN_CMPUNORDSS,
@@ -27252,8 +27557,6 @@ static const struct builtin_description bdesc_args[] =
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
- { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, (int) V4SF_FTYPE_V4SF_V4SF_SWAP },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, (int) V4SF_FTYPE_V4SF_V4SF },
{ OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF },
@@ -29468,7 +29771,7 @@ ix86_get_function_versions_dispatcher (void *decl)
dispatcher_version_info
= insert_new_cgraph_node_version (dispatcher_node);
dispatcher_version_info->next = default_version_info;
- dispatcher_node->local.finalized = 1;
+ dispatcher_node->symbol.definition = 1;
/* Set the dispatcher for all the versions. */
it_v = default_version_info;
@@ -29623,7 +29926,7 @@ ix86_generate_version_dispatcher_body (void *node_p)
default_ver_decl = node_version_info->next->this_node->symbol.decl;
/* node is going to be an alias, so remove the finalized bit. */
- node->local.finalized = false;
+ node->symbol.definition = false;
resolver_decl = make_resolver_func (default_ver_decl,
node->symbol.decl, &empty_bb);
@@ -29756,6 +30059,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
M_INTEL_COREI7,
M_AMDFAM10H,
M_AMDFAM15H,
+ M_INTEL_SLM,
M_CPU_SUBTYPE_START,
M_INTEL_COREI7_NEHALEM,
M_INTEL_COREI7_WESTMERE,
@@ -29778,6 +30082,7 @@ fold_builtin_cpu (tree fndecl, tree *args)
{"amd", M_AMD},
{"intel", M_INTEL},
{"atom", M_INTEL_ATOM},
+ {"slm", M_INTEL_SLM},
{"core2", M_INTEL_CORE2},
{"corei7", M_INTEL_COREI7},
{"nehalem", M_INTEL_COREI7_NEHALEM},
@@ -29817,6 +30122,9 @@ fold_builtin_cpu (tree fndecl, tree *args)
tree __cpu_model_var = make_var_decl (__processor_model_type,
"__cpu_model");
+
+ varpool_add_new_variable (__cpu_model_var);
+
gcc_assert ((args != NULL) && (*args != NULL));
param_string_cst = *args;
@@ -33650,6 +33958,8 @@ static inline bool
inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
enum machine_mode mode, int strict)
{
+ if (lra_in_progress && (class1 == NO_REGS || class2 == NO_REGS))
+ return false;
if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
|| MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
|| MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
@@ -35564,6 +35874,46 @@ ix86_pad_short_function (void)
}
}
+/* Fix up a Windows system unwinder issue. If an EH region falls thru into
+ the epilogue, the Windows system unwinder will apply epilogue logic and
+ produce incorrect offsets. This can be avoided by adding a nop between
+ the last insn that can throw and the first insn of the epilogue. */
+
+static void
+ix86_seh_fixup_eh_fallthru (void)
+{
+ edge e;
+ edge_iterator ei;
+
+ FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
+ {
+ rtx insn, next;
+
+ /* Find the beginning of the epilogue. */
+ for (insn = BB_END (e->src); insn != NULL; insn = PREV_INSN (insn))
+ if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_EPILOGUE_BEG)
+ break;
+ if (insn == NULL)
+ continue;
+
+ /* We only care about preceeding insns that can throw. */
+ insn = prev_active_insn (insn);
+ if (insn == NULL || !can_throw_internal (insn))
+ continue;
+
+ /* Do not separate calls from their debug information. */
+ for (next = NEXT_INSN (insn); next != NULL; next = NEXT_INSN (next))
+ if (NOTE_P (next)
+ && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
+ || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION))
+ insn = next;
+ else
+ break;
+
+ emit_insn_after (gen_nops (const1_rtx), insn);
+ }
+}
+
/* Implement machine specific optimizations. We implement padding of returns
for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
static void
@@ -35573,6 +35923,9 @@ ix86_reorg (void)
with old MDEP_REORGS that are not CFG based. Recompute it now. */
compute_bb_for_insn ();
+ if (TARGET_SEH && current_function_has_exception_handlers ())
+ ix86_seh_fixup_eh_fallthru ();
+
if (optimize && optimize_function_for_speed_p (cfun))
{
if (TARGET_PAD_SHORT_FUNCTION)
@@ -42682,6 +43035,9 @@ ix86_memmodel_check (unsigned HOST_WIDE_INT val)
#undef TARGET_REGISTER_PRIORITY
#define TARGET_REGISTER_PRIORITY ix86_register_priority
+#undef TARGET_REGISTER_USAGE_LEVELING_P
+#define TARGET_REGISTER_USAGE_LEVELING_P hook_bool_void_true
+
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P ix86_legitimate_constant_p
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index 6055b99a55b..7d940f98804 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -257,6 +257,7 @@ extern const struct processor_costs ix86_size_cost;
#define TARGET_BTVER1 (ix86_tune == PROCESSOR_BTVER1)
#define TARGET_BTVER2 (ix86_tune == PROCESSOR_BTVER2)
#define TARGET_ATOM (ix86_tune == PROCESSOR_ATOM)
+#define TARGET_SLM (ix86_tune == PROCESSOR_SLM)
/* Feature tests against the various tunings. */
enum ix86_tune_indices {
@@ -332,6 +333,7 @@ enum ix86_tune_indices {
X86_TUNE_REASSOC_FP_TO_PARALLEL,
X86_TUNE_GENERAL_REGS_SSE_SPILL,
X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE,
+ X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS,
X86_TUNE_LAST
};
@@ -442,6 +444,8 @@ extern unsigned char ix86_tune_features[X86_TUNE_LAST];
ix86_tune_features[X86_TUNE_GENERAL_REGS_SSE_SPILL]
#define TARGET_AVOID_MEM_OPND_FOR_CMOVE \
ix86_tune_features[X86_TUNE_AVOID_MEM_OPND_FOR_CMOVE]
+#define TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS \
+ ix86_tune_features[X86_TUNE_SPLIT_MEM_OPND_FOR_FP_CONVERTS]
/* Feature tests against the various architecture variations. */
enum ix86_arch_indices {
@@ -623,6 +627,7 @@ enum target_cpu_default
TARGET_CPU_DEFAULT_corei7,
TARGET_CPU_DEFAULT_haswell,
TARGET_CPU_DEFAULT_atom,
+ TARGET_CPU_DEFAULT_slm,
TARGET_CPU_DEFAULT_geode,
TARGET_CPU_DEFAULT_k6,
@@ -854,7 +859,18 @@ enum target_cpu_default
cause character arrays to be word-aligned so that `strcpy' calls
that copy constants to character arrays can be done inline. */
-#define DATA_ALIGNMENT(TYPE, ALIGN) ix86_data_alignment ((TYPE), (ALIGN))
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+ ix86_data_alignment ((TYPE), (ALIGN), true)
+
+/* Similar to DATA_ALIGNMENT, but for the cases where the ABI mandates
+ some alignment increase, instead of optimization only purposes. E.g.
+ AMD x86-64 psABI says that variables with array type larger than 15 bytes
+ must be aligned to 16 byte boundaries.
+
+ If this macro is not defined, then ALIGN is used. */
+
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+ ix86_data_alignment ((TYPE), (ALIGN), false)
/* If defined, a C expression to compute the alignment for a local
variable. TYPE is the data type, and ALIGN is the alignment that
@@ -2131,6 +2147,7 @@ enum processor_type
PROCESSOR_BTVER1,
PROCESSOR_BTVER2,
PROCESSOR_ATOM,
+ PROCESSOR_SLM,
PROCESSOR_max
};
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index ce77f15f009..a6e2946584b 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -323,7 +323,7 @@
;; Processor type.
(define_attr "cpu" "none,pentium,pentiumpro,geode,k6,athlon,k8,core2,corei7,
- atom,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
+ atom,slm,generic64,amdfam10,bdver1,bdver2,bdver3,btver1,btver2"
(const (symbol_ref "ix86_schedule")))
;; A basic instruction type. Refinements due to arguments to be
@@ -964,6 +964,7 @@
(include "btver2.md")
(include "geode.md")
(include "atom.md")
+(include "slm.md")
(include "core2.md")
@@ -3624,6 +3625,18 @@
CONST0_RTX (V4SFmode), operands[1]));
})
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+ [(set (match_operand:DF 0 "register_operand")
+ (float_extend:DF
+ (match_operand:SF 1 "memory_operand")))]
+ "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && SSE_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float_extend:DF (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (SFmode, REGNO (operands[0]));")
+
(define_insn "*extendsfdf2_mixed"
[(set (match_operand:DF 0 "nonimmediate_operand" "=f,m,x")
(float_extend:DF
@@ -3765,6 +3778,18 @@
CONST0_RTX (V2DFmode), operands[1]));
})
+;; It's more profitable to split and then extend in the same register.
+(define_peephole2
+ [(set (match_operand:SF 0 "register_operand")
+ (float_truncate:SF
+ (match_operand:DF 1 "memory_operand")))]
+ "TARGET_SPLIT_MEM_OPND_FOR_FP_CONVERTS
+ && optimize_insn_for_speed_p ()
+ && SSE_REG_P (operands[0])"
+ [(set (match_dup 2) (match_dup 1))
+ (set (match_dup 0) (float_truncate:SF (match_dup 2)))]
+ "operands[2] = gen_rtx_REG (DFmode, REGNO (operands[0]));")
+
(define_expand "truncdfsf2_with_temp"
[(parallel [(set (match_operand:SF 0)
(float_truncate:SF (match_operand:DF 1)))
@@ -11654,8 +11679,8 @@
(define_insn "bmi_bextr_<mode>"
[(set (match_operand:SWI48 0 "register_operand" "=r,r")
- (unspec:SWI48 [(match_operand:SWI48 1 "register_operand" "r,r")
- (match_operand:SWI48 2 "nonimmediate_operand" "r,m")]
+ (unspec:SWI48 [(match_operand:SWI48 1 "nonimmediate_operand" "r,m")
+ (match_operand:SWI48 2 "register_operand" "r,r")]
UNSPEC_BEXTR))
(clobber (reg:CC FLAGS_REG))]
"TARGET_BMI"
@@ -11708,9 +11733,9 @@
;; BMI2 instructions.
(define_insn "bmi2_bzhi_<mode>3"
[(set (match_operand:SWI48 0 "register_operand" "=r")
- (and:SWI48 (match_operand:SWI48 1 "register_operand" "r")
- (lshiftrt:SWI48 (const_int -1)
- (match_operand:SWI48 2 "nonimmediate_operand" "rm"))))
+ (and:SWI48 (lshiftrt:SWI48 (const_int -1)
+ (match_operand:SWI48 2 "register_operand" "r"))
+ (match_operand:SWI48 1 "nonimmediate_operand" "rm")))
(clobber (reg:CC FLAGS_REG))]
"TARGET_BMI2"
"bzhi\t{%2, %1, %0|%0, %1, %2}"
@@ -16566,6 +16591,7 @@
"(TARGET_READ_MODIFY_WRITE || optimize_insn_for_size_p ())
&& peep2_reg_dead_p (4, operands[0])
&& !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
&& (<MODE>mode != QImode
|| immediate_operand (operands[2], QImode)
|| q_regs_operand (operands[2], QImode))
@@ -16630,6 +16656,7 @@
|| immediate_operand (operands[2], SImode)
|| q_regs_operand (operands[2], SImode))
&& !reg_overlap_mentioned_p (operands[0], operands[1])
+ && !reg_overlap_mentioned_p (operands[0], operands[2])
&& ix86_match_ccmode (peep2_next_insn (3),
(GET_CODE (operands[3]) == PLUS
|| GET_CODE (operands[3]) == MINUS)
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index 131af0be2b7..b26dc46d256 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -49,7 +49,12 @@ __bswapd (int __X)
return __builtin_bswap32 (__X);
}
-#ifdef __SSE4_2__
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
/* 32bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -71,7 +76,11 @@ __crc32d (unsigned int __C, unsigned int __V)
{
return __builtin_ia32_crc32si (__C, __V);
}
-#endif /* SSE4.2 */
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
/* 32bit popcnt */
extern __inline int
@@ -186,7 +195,12 @@ __bswapq (long long __X)
return __builtin_bswap64 (__X);
}
-#ifdef __SSE4_2__
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
+
/* 64bit accumulate CRC32 (polynomial 0x11EDC6F41) value. */
extern __inline unsigned long long
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -194,7 +208,11 @@ __crc32q (unsigned long long __C, unsigned long long __V)
{
return __builtin_ia32_crc32di (__C, __V);
}
-#endif
+
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
/* 64bit popcnt */
extern __inline long long
diff --git a/gcc/config/i386/immintrin.h b/gcc/config/i386/immintrin.h
index b137753a4f5..e825c34a256 100644
--- a/gcc/config/i386/immintrin.h
+++ b/gcc/config/i386/immintrin.h
@@ -24,71 +24,43 @@
#ifndef _IMMINTRIN_H_INCLUDED
#define _IMMINTRIN_H_INCLUDED
-#ifdef __MMX__
#include <mmintrin.h>
-#endif
-#ifdef __SSE__
#include <xmmintrin.h>
-#endif
-#ifdef __SSE2__
#include <emmintrin.h>
-#endif
-#ifdef __SSE3__
#include <pmmintrin.h>
-#endif
-#ifdef __SSSE3__
#include <tmmintrin.h>
-#endif
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
#include <smmintrin.h>
-#endif
-#if defined (__AES__) || defined (__PCLMUL__)
#include <wmmintrin.h>
-#endif
-#ifdef __AVX__
#include <avxintrin.h>
-#endif
-#ifdef __AVX2__
#include <avx2intrin.h>
-#endif
-#ifdef __LZCNT__
#include <lzcntintrin.h>
-#endif
-#ifdef __BMI__
#include <bmiintrin.h>
-#endif
-#ifdef __BMI2__
#include <bmi2intrin.h>
-#endif
-#ifdef __FMA__
#include <fmaintrin.h>
-#endif
-#ifdef __F16C__
#include <f16cintrin.h>
-#endif
-#ifdef __RTM__
#include <rtmintrin.h>
-#endif
-#ifdef __RTM__
#include <xtestintrin.h>
-#endif
-#ifdef __RDRND__
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdrand16_step (unsigned short *__P)
@@ -102,10 +74,18 @@ _rdrand32_step (unsigned int *__P)
{
return __builtin_ia32_rdrand32_step (__P);
}
-#endif /* __RDRND__ */
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
#ifdef __x86_64__
-#ifdef __FSGSBASE__
+
+#ifndef __FSGSBASE__
+#pragma GCC push_options
+#pragma GCC target("fsgsbase")
+#define __DISABLE_FSGSBASE__
+#endif /* __FSGSBASE__ */
extern __inline unsigned int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_readfsbase_u32 (void)
@@ -161,16 +141,27 @@ _writegsbase_u64 (unsigned long long __B)
{
__builtin_ia32_wrgsbase64 (__B);
}
-#endif /* __FSGSBASE__ */
-
-#ifdef __RDRND__
+#ifdef __DISABLE_FSGSBASE__
+#undef __DISABLE_FSGSBASE__
+#pragma GCC pop_options
+#endif /* __DISABLE_FSGSBASE__ */
+
+#ifndef __RDRND__
+#pragma GCC push_options
+#pragma GCC target("rdrnd")
+#define __DISABLE_RDRND__
+#endif /* __RDRND__ */
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_rdrand64_step (unsigned long long *__P)
{
return __builtin_ia32_rdrand64_step (__P);
}
-#endif /* __RDRND__ */
+#ifdef __DISABLE_RDRND__
+#undef __DISABLE_RDRND__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDRND__ */
+
#endif /* __x86_64__ */
#endif /* _IMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/lwpintrin.h b/gcc/config/i386/lwpintrin.h
index 8c708508a80..64ba7321fd9 100644
--- a/gcc/config/i386/lwpintrin.h
+++ b/gcc/config/i386/lwpintrin.h
@@ -29,8 +29,10 @@
#define _LWPINTRIN_H_INCLUDED
#ifndef __LWP__
-# error "LWP instruction set not enabled"
-#else
+#pragma GCC push_options
+#pragma GCC target("lwp")
+#define __DISABLE_LWP__
+#endif /* __LWP__ */
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__llwpcb (void *pcbAddress)
@@ -95,6 +97,9 @@ __lwpins64 (unsigned long long data2, unsigned int data1, unsigned int flags)
#endif
#endif
-#endif /* __LWP__ */
+#ifdef __DISABLE_LWP__
+#undef __DISABLE_LWP__
+#pragma GCC pop_options
+#endif /* __DISABLE_LWP__ */
#endif /* _LWPINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/lzcntintrin.h b/gcc/config/i386/lzcntintrin.h
index 9382bb96ecc..22b9ee7999e 100644
--- a/gcc/config/i386/lzcntintrin.h
+++ b/gcc/config/i386/lzcntintrin.h
@@ -25,13 +25,16 @@
# error "Never use <lzcntintrin.h> directly; include <x86intrin.h> instead."
#endif
-#ifndef __LZCNT__
-# error "LZCNT instruction is not enabled"
-#endif /* __LZCNT__ */
#ifndef _LZCNTINTRIN_H_INCLUDED
#define _LZCNTINTRIN_H_INCLUDED
+#ifndef __LZCNT__
+#pragma GCC push_options
+#pragma GCC target("lzcnt")
+#define __DISABLE_LZCNT__
+#endif /* __LZCNT__ */
+
extern __inline unsigned short __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__lzcnt16 (unsigned short __X)
{
@@ -64,4 +67,9 @@ _lzcnt_u64 (unsigned long long __X)
}
#endif
+#ifdef __DISABLE_LZCNT__
+#undef __DISABLE_LZCNT__
+#pragma GCC pop_options
+#endif /* __DISABLE_LZCNT__ */
+
#endif /* _LZCNTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/mm3dnow.h b/gcc/config/i386/mm3dnow.h
index 7e806b701a1..093d5e77932 100644
--- a/gcc/config/i386/mm3dnow.h
+++ b/gcc/config/i386/mm3dnow.h
@@ -27,11 +27,15 @@
#ifndef _MM3DNOW_H_INCLUDED
#define _MM3DNOW_H_INCLUDED
-#ifdef __3dNOW__
-
#include <mmintrin.h>
#include <prfchwintrin.h>
+#ifndef __3dNOW__
+#pragma GCC push_options
+#pragma GCC target("3dnow")
+#define __DISABLE_3dNOW__
+#endif /* __3dNOW__ */
+
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_femms (void)
{
@@ -205,6 +209,10 @@ _m_pswapd (__m64 __A)
}
#endif /* __3dNOW_A__ */
-#endif /* __3dNOW__ */
+
+#ifdef __DISABLE_3dNOW__
+#undef __DISABLE_3dNOW__
+#pragma GCC pop_options
+#endif /* __DISABLE_3dNOW__ */
#endif /* _MM3DNOW_H_INCLUDED */
diff --git a/gcc/config/i386/mmintrin.h b/gcc/config/i386/mmintrin.h
index c76203b5477..c0729709373 100644
--- a/gcc/config/i386/mmintrin.h
+++ b/gcc/config/i386/mmintrin.h
@@ -28,8 +28,11 @@
#define _MMINTRIN_H_INCLUDED
#ifndef __MMX__
-# error "MMX instruction set not enabled"
-#else
+#pragma GCC push_options
+#pragma GCC target("mmx")
+#define __DISABLE_MMX__
+#endif /* __MMX__ */
+
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef int __m64 __attribute__ ((__vector_size__ (8), __may_alias__));
@@ -303,13 +306,21 @@ _m_paddd (__m64 __m1, __m64 __m2)
}
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
-#ifdef __SSE2__
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_add_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_paddq ((__v1di)__m1, (__v1di)__m2);
}
-#endif
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
/* Add the 8-bit values in M1 to the 8-bit values in M2 using signed
saturated arithmetic. */
@@ -407,13 +418,21 @@ _m_psubd (__m64 __m1, __m64 __m2)
}
/* Add the 64-bit values in M1 to the 64-bit values in M2. */
-#ifdef __SSE2__
+#ifndef __SSE2__
+#pragma GCC push_options
+#pragma GCC target("sse2")
+#define __DISABLE_SSE2__
+#endif /* __SSE2__ */
+
extern __inline __m64 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_sub_si64 (__m64 __m1, __m64 __m2)
{
return (__m64) __builtin_ia32_psubq ((__v1di)__m1, (__v1di)__m2);
}
-#endif
+#ifdef __DISABLE_SSE2__
+#undef __DISABLE_SSE2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE2__ */
/* Subtract the 8-bit values in M2 from the 8-bit values in M1 using signed
saturating arithmetic. */
@@ -915,6 +934,9 @@ _mm_set1_pi8 (char __b)
{
return _mm_set_pi8 (__b, __b, __b, __b, __b, __b, __b, __b);
}
+#ifdef __DISABLE_MMX__
+#undef __DISABLE_MMX__
+#pragma GCC pop_options
+#endif /* __DISABLE_MMX__ */
-#endif /* __MMX__ */
#endif /* _MMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/nmmintrin.h b/gcc/config/i386/nmmintrin.h
index a4fbed26268..aefe3ef9e90 100644
--- a/gcc/config/i386/nmmintrin.h
+++ b/gcc/config/i386/nmmintrin.h
@@ -27,11 +27,7 @@
#ifndef _NMMINTRIN_H_INCLUDED
#define _NMMINTRIN_H_INCLUDED
-#ifndef __SSE4_2__
-# error "SSE4.2 instruction set not enabled"
-#else
/* We just include SSE4.1 header file. */
#include <smmintrin.h>
-#endif /* __SSE4_2__ */
#endif /* _NMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/pmmintrin.h b/gcc/config/i386/pmmintrin.h
index 9c6956c1374..2447d5aa31b 100644
--- a/gcc/config/i386/pmmintrin.h
+++ b/gcc/config/i386/pmmintrin.h
@@ -27,13 +27,15 @@
#ifndef _PMMINTRIN_H_INCLUDED
#define _PMMINTRIN_H_INCLUDED
-#ifndef __SSE3__
-# error "SSE3 instruction set not enabled"
-#else
-
/* We need definitions from the SSE2 and SSE header files*/
#include <emmintrin.h>
+#ifndef __SSE3__
+#pragma GCC push_options
+#pragma GCC target("sse3")
+#define __DISABLE_SSE3__
+#endif /* __SSE3__ */
+
/* Additional bits in the MXCSR. */
#define _MM_DENORMALS_ZERO_MASK 0x0040
#define _MM_DENORMALS_ZERO_ON 0x0040
@@ -122,6 +124,9 @@ _mm_mwait (unsigned int __E, unsigned int __H)
__builtin_ia32_mwait (__E, __H);
}
-#endif /* __SSE3__ */
+#ifdef __DISABLE_SSE3__
+#undef __DISABLE_SSE3__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE3__ */
#endif /* _PMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/popcntintrin.h b/gcc/config/i386/popcntintrin.h
index af7efdf5d10..ee3a8e0d076 100644
--- a/gcc/config/i386/popcntintrin.h
+++ b/gcc/config/i386/popcntintrin.h
@@ -21,13 +21,15 @@
see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
<http://www.gnu.org/licenses/>. */
-#ifndef __POPCNT__
-# error "POPCNT instruction set not enabled"
-#endif /* __POPCNT__ */
-
#ifndef _POPCNTINTRIN_H_INCLUDED
#define _POPCNTINTRIN_H_INCLUDED
+#ifndef __POPCNT__
+#pragma GCC push_options
+#pragma GCC target("popcnt")
+#define __DISABLE_POPCNT__
+#endif /* __POPCNT__ */
+
/* Calculate a number of bits set to 1. */
extern __inline int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_popcnt_u32 (unsigned int __X)
@@ -43,4 +45,9 @@ _mm_popcnt_u64 (unsigned long long __X)
}
#endif
+#ifdef __DISABLE_POPCNT__
+#undef __DISABLE_POPCNT__
+#pragma GCC pop_options
+#endif /* __DISABLE_POPCNT__ */
+
#endif /* _POPCNTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/prfchwintrin.h b/gcc/config/i386/prfchwintrin.h
index b8011bb6bd1..73aa4cac7af 100644
--- a/gcc/config/i386/prfchwintrin.h
+++ b/gcc/config/i386/prfchwintrin.h
@@ -26,17 +26,24 @@
#endif
-#if !defined (__PRFCHW__) && !defined (__3dNOW__)
-# error "PRFCHW instruction not enabled"
-#endif /* __PRFCHW__ or __3dNOW__*/
-
#ifndef _PRFCHWINTRIN_H_INCLUDED
#define _PRFCHWINTRIN_H_INCLUDED
+#ifndef __PRFCHW__
+#pragma GCC push_options
+#pragma GCC target("prfchw")
+#define __DISABLE_PRFCHW__
+#endif /* __PRFCHW__ */
+
extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_m_prefetchw (void *__P)
{
__builtin_prefetch (__P, 1, 3 /* _MM_HINT_T0 */);
}
+#ifdef __DISABLE_PRFCHW__
+#undef __DISABLE_PRFCHW__
+#pragma GCC pop_options
+#endif /* __DISABLE_PRFCHW__ */
+
#endif /* _PRFCHWINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/rdseedintrin.h b/gcc/config/i386/rdseedintrin.h
index f30c237a6cb..3d040ab3af6 100644
--- a/gcc/config/i386/rdseedintrin.h
+++ b/gcc/config/i386/rdseedintrin.h
@@ -25,12 +25,15 @@
# error "Never use <rdseedintrin.h> directly; include <x86intrin.h> instead."
#endif
+#ifndef _RDSEEDINTRIN_H_INCLUDED
+#define _RDSEEDINTRIN_H_INCLUDED
+
#ifndef __RDSEED__
-# error "RDSEED instruction not enabled"
+#pragma GCC push_options
+#pragma GCC target("rdseed")
+#define __DISABLE_RDSEED__
#endif /* __RDSEED__ */
-#ifndef _RDSEEDINTRIN_H_INCLUDED
-#define _RDSEEDINTRIN_H_INCLUDED
extern __inline int
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -55,4 +58,9 @@ _rdseed64_step (unsigned long long *p)
}
#endif
+#ifdef __DISABLE_RDSEED__
+#undef __DISABLE_RDSEED__
+#pragma GCC pop_options
+#endif /* __DISABLE_RDSEED__ */
+
#endif /* _RDSEEDINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/rtmintrin.h b/gcc/config/i386/rtmintrin.h
index 003a7718db3..eb2812fd82e 100644
--- a/gcc/config/i386/rtmintrin.h
+++ b/gcc/config/i386/rtmintrin.h
@@ -25,13 +25,15 @@
# error "Never use <rtmintrin.h> directly; include <immintrin.h> instead."
#endif
-#ifndef __RTM__
-# error "RTM instruction set not enabled"
-#endif /* __RTM__ */
-
#ifndef _RTMINTRIN_H_INCLUDED
#define _RTMINTRIN_H_INCLUDED
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __DISABLE_RTM__
+#endif /* __RTM__ */
+
#define _XBEGIN_STARTED (~0u)
#define _XABORT_EXPLICIT (1 << 0)
#define _XABORT_RETRY (1 << 1)
@@ -74,4 +76,9 @@ _xabort (const unsigned int imm)
#define _xabort(N) __builtin_ia32_xabort (N)
#endif /* __OPTIMIZE__ */
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#pragma GCC pop_options
+#endif /* __DISABLE_RTM__ */
+
#endif /* _RTMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/slm.md b/gcc/config/i386/slm.md
new file mode 100644
index 00000000000..3ac919e372c
--- /dev/null
+++ b/gcc/config/i386/slm.md
@@ -0,0 +1,758 @@
+;; Slivermont(SLM) Scheduling
+;; Copyright (C) 2009, 2010 Free Software Foundation, Inc.
+;;
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify
+;; it under the terms of the GNU General Public License as published by
+;; the Free Software Foundation; either version 3, or (at your option)
+;; any later version.
+;;
+;; GCC is distributed in the hope that it will be useful,
+;; but WITHOUT ANY WARRANTY; without even the implied warranty of
+;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+;; GNU General Public License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+;;
+;; Silvermont has 2 out-of-order IEC, 2 in-order FEC and 1 in-order MEC.
+
+
+(define_automaton "slm")
+
+;; EU: Execution Unit
+;; Silvermont EUs are connected by port 0 or port 1.
+
+;; SLM has two ports: port 0 and port 1 connecting to all execution units
+(define_cpu_unit "slm-port-0,slm-port-1" "slm")
+
+(define_cpu_unit "slm-ieu-0, slm-ieu-1,
+ slm-imul, slm-feu-0, slm-feu-1"
+ "slm")
+
+(define_reservation "slm-all-ieu" "(slm-ieu-0 + slm-ieu-1 + slm-imul)")
+(define_reservation "slm-all-feu" "(slm-feu-0 + slm-feu-1)")
+(define_reservation "slm-all-eu" "(slm-all-ieu + slm-all-feu)")
+(define_reservation "slm-fp-0" "(slm-port-0 + slm-feu-0)")
+
+;; Some EUs have duplicated copied and can be accessed via either
+;; port 0 or port 1
+;; (define_reservation "slm-port-either" "(slm-port-0 | slm-port-1)"
+(define_reservation "slm-port-dual" "(slm-port-0 + slm-port-1)")
+
+;;; fmul insn can have 4 or 5 cycles latency
+(define_reservation "slm-fmul-5c"
+ "(slm-port-0 + slm-feu-0), slm-feu-0, nothing*3")
+(define_reservation "slm-fmul-4c" "(slm-port-0 + slm-feu-0), nothing*3")
+
+;;; fadd can has 3 cycles latency depends on instruction forms
+(define_reservation "slm-fadd-3c" "(slm-port-1 + slm-feu-1), nothing*2")
+(define_reservation "slm-fadd-4c"
+ "(slm-port-1 + slm-feu-1), slm-feu-1, nothing*2")
+
+;;; imul insn has 3 cycles latency for SI operands
+(define_reservation "slm-imul-32"
+ "(slm-port-1 + slm-imul), nothing*2")
+(define_reservation "slm-imul-mem-32"
+ "(slm-port-1 + slm-imul + slm-port-0), nothing*2")
+;;; imul has 4 cycles latency for DI operands with 1/2 tput
+(define_reservation "slm-imul-64"
+ "(slm-port-1 + slm-imul), slm-imul, nothing*2")
+
+;;; dual-execution instructions can have 1,2,4,5 cycles latency depends on
+;;; instruction forms
+(define_reservation "slm-dual-1c" "(slm-port-dual + slm-all-eu)")
+(define_reservation "slm-dual-2c"
+ "(slm-port-dual + slm-all-eu, nothing)")
+
+;;; Most of simple ALU instructions have 1 cycle latency. Some of them
+;;; issue in port 0, some in port 0 and some in either port.
+(define_reservation "slm-simple-0" "(slm-port-0 + slm-ieu-0)")
+(define_reservation "slm-simple-1" "(slm-port-1 + slm-ieu-1)")
+(define_reservation "slm-simple-either" "(slm-simple-0 | slm-simple-1)")
+
+;;; Complex macro-instruction has variants of latency, and uses both ports.
+(define_reservation "slm-complex" "(slm-port-dual + slm-all-eu)")
+
+(define_insn_reservation "slm_other" 9
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "other")
+ (eq_attr "atom_unit" "!jeu")))
+ "slm-complex, slm-all-eu*8")
+
+;; return has type "other" with atom_unit "jeu"
+(define_insn_reservation "slm_other_2" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "other")
+ (eq_attr "atom_unit" "jeu")))
+ "slm-dual-1c")
+
+(define_insn_reservation "slm_multi" 9
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "multi"))
+ "slm-complex, slm-all-eu*8")
+
+;; Normal alu insns without carry
+(define_insn_reservation "slm_alu" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "none")
+ (eq_attr "use_carry" "0"))))
+ "slm-simple-either")
+
+;; Normal alu insns without carry, but use MEC.
+(define_insn_reservation "slm_alu_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "use_carry" "0"))))
+ "slm-simple-either")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation "slm_alu_carry" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "none")
+ (eq_attr "use_carry" "1"))))
+ "slm-simple-either, nothing")
+
+;; Alu insn consuming CF, such as add/sbb
+(define_insn_reservation "slm_alu_carry_mem" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu")
+ (and (eq_attr "memory" "!none")
+ (eq_attr "use_carry" "1"))))
+ "slm-simple-either, nothing")
+
+(define_insn_reservation "slm_alu1" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "none") (eq_attr "prefix_0f" "0")))
+ "slm-simple-either")
+
+;; bsf and bsf insn
+(define_insn_reservation "slm_alu1_1" 10
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "none") (eq_attr "prefix_0f" "1")))
+ "slm-simple-1, slm-ieu-1*9")
+
+(define_insn_reservation "slm_alu1_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "alu1")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_negnot" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "negnot")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_negnot_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "negnot")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_imov" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_imov_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imov")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+;; 16<-16, 32<-32
+(define_insn_reservation "slm_imovx" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "none")
+ (ior (and (match_operand:HI 0 "register_operand")
+ (match_operand:HI 1 "general_operand"))
+ (and (match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "general_operand"))))))
+ "slm-simple-either")
+
+;; 16<-16, 32<-32, mem
+(define_insn_reservation "slm_imovx_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "!none")
+ (ior (and (match_operand:HI 0 "register_operand")
+ (match_operand:HI 1 "general_operand"))
+ (and (match_operand:SI 0 "register_operand")
+ (match_operand:SI 1 "general_operand"))))))
+ "slm-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8
+(define_insn_reservation "slm_imovx_2" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "none")
+ (ior (match_operand:QI 0 "register_operand")
+ (ior (and (match_operand:SI 0 "register_operand")
+ (not (match_operand:SI 1 "general_operand")))
+ (match_operand:DI 0 "register_operand"))))))
+ "slm-simple-either")
+
+;; 32<-16, 32<-8, 64<-16, 64<-8, 64<-32, 8<-8, mem
+(define_insn_reservation "slm_imovx_2_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (eq_attr "memory" "!none")
+ (ior (match_operand:QI 0 "register_operand")
+ (ior (and (match_operand:SI 0 "register_operand")
+ (not (match_operand:SI 1 "general_operand")))
+ (match_operand:DI 0 "register_operand"))))))
+ "slm-simple-0")
+
+;; 16<-8
+(define_insn_reservation "slm_imovx_3" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imovx")
+ (and (match_operand:HI 0 "register_operand")
+ (match_operand:QI 1 "general_operand"))))
+ "slm-simple-0, nothing*2")
+
+(define_insn_reservation "slm_lea" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "lea")
+ (eq_attr "mode" "!HI")))
+ "slm-simple-either")
+
+;; lea 16bit address is complex insn
+(define_insn_reservation "slm_lea_2" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "lea")
+ (eq_attr "mode" "HI")))
+ "slm-complex, slm-all-eu")
+
+(define_insn_reservation "slm_incdec" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "incdec")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_incdec_mem" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "incdec")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0, nothing*2")
+
+;; simple shift instruction use SHIFT eu, none memory
+(define_insn_reservation "slm_ishift" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift")
+ (and (eq_attr "memory" "none") (eq_attr "prefix_0f" "0"))))
+ "slm-simple-0")
+
+;; simple shift instruction use SHIFT eu, memory
+(define_insn_reservation "slm_ishift_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift")
+ (and (eq_attr "memory" "!none") (eq_attr "prefix_0f" "0"))))
+ "slm-simple-0")
+
+;; DF shift (prefixed with 0f) is complex insn with latency of 4 cycles
+(define_insn_reservation "slm_ishift_3" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift")
+ (eq_attr "prefix_0f" "1")))
+ "slm-complex, slm-all-eu*3")
+
+(define_insn_reservation "slm_ishift1" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift1")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_ishift1_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ishift1")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_rotate" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "rotate")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_rotate_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "rotate")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_rotate1" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "rotate1")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_rotate1_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "rotate1")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_imul" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "memory" "none") (eq_attr "mode" "SI"))))
+ "slm-imul-32")
+
+(define_insn_reservation "slm_imul_mem" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imul")
+ (and (eq_attr "memory" "!none") (eq_attr "mode" "SI"))))
+ "slm-imul-mem-32")
+
+;; latency set to 4 as common 64x64 imul with 1/2 tput
+(define_insn_reservation "slm_imul_3" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "imul")
+ (eq_attr "mode" "!SI")))
+ "slm-imul-64")
+
+(define_insn_reservation "slm_idiv" 33
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "idiv"))
+ "slm-complex, slm-all-eu*16, nothing*16")
+
+(define_insn_reservation "slm_icmp" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "icmp")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_icmp_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "icmp")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_test" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "test")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_test_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "test")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_ibr" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "!load")))
+ "slm-simple-1")
+
+;; complex if jump target is from address
+(define_insn_reservation "slm_ibr_2" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ibr")
+ (eq_attr "memory" "load")))
+ "slm-complex, slm-all-eu")
+
+(define_insn_reservation "slm_setcc" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "setcc")
+ (eq_attr "memory" "!store")))
+ "slm-simple-either")
+
+;; 2 cycles complex if target is in memory
+(define_insn_reservation "slm_setcc_2" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "setcc")
+ (eq_attr "memory" "store")))
+ "slm-complex, slm-all-eu")
+
+(define_insn_reservation "slm_icmov" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "icmov")
+ (eq_attr "memory" "none")))
+ "slm-simple-either, nothing")
+
+(define_insn_reservation "slm_icmov_mem" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "icmov")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0, nothing")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation "slm_push" 2
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "push"))
+ "slm-dual-2c")
+
+;; pop r64 is 1 cycle. UCODE if segreg, ignored
+(define_insn_reservation "slm_pop" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "pop")
+ (eq_attr "mode" "DI")))
+ "slm-dual-1c")
+
+;; pop non-r64 is 2 cycles. UCODE if segreg, ignored
+(define_insn_reservation "slm_pop_2" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "pop")
+ (eq_attr "mode" "!DI")))
+ "slm-dual-2c")
+
+;; UCODE if segreg, ignored
+(define_insn_reservation "slm_call" 1
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "call"))
+ "slm-dual-1c")
+
+(define_insn_reservation "slm_callv" 1
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "callv"))
+ "slm-dual-1c")
+
+(define_insn_reservation "slm_leave" 3
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "leave"))
+ "slm-complex, slm-all-eu*2")
+
+(define_insn_reservation "slm_str" 3
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "str"))
+ "slm-complex, slm-all-eu*2")
+
+(define_insn_reservation "slm_sselog" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_sselog_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sselog")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_sselog1" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sselog1")
+ (eq_attr "memory" "none")))
+ "slm-simple-0")
+
+(define_insn_reservation "slm_sselog1_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sselog1")
+ (eq_attr "memory" "!none")))
+ "slm-simple-0")
+
+;; not pmad, not psad
+(define_insn_reservation "slm_sseiadd" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "!simul")
+ (eq_attr "atom_unit" "!complex")))))
+ "slm-simple-either")
+
+;; pmad, psad and 64
+(define_insn_reservation "slm_sseiadd_2" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "simul" )
+ (eq_attr "mode" "DI")))))
+ "slm-fmul-4c")
+
+;; pmad, psad and 128
+(define_insn_reservation "slm_sseiadd_3" 5
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseiadd")
+ (and (not (match_operand:V2DI 0 "register_operand"))
+ (and (eq_attr "atom_unit" "simul" )
+ (eq_attr "mode" "TI")))))
+ "slm-fmul-5c")
+
+;; if paddq(64 bit op), phadd/phsub
+(define_insn_reservation "slm_sseiadd_4" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseiadd")
+ (ior (match_operand:V2DI 0 "register_operand")
+ (eq_attr "atom_unit" "complex"))))
+ "slm-fadd-4c")
+
+;; if immediate op.
+(define_insn_reservation "slm_sseishft" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseishft")
+ (and (eq_attr "atom_unit" "!sishuf")
+ (match_operand 2 "immediate_operand"))))
+ "slm-simple-either")
+
+;; if palignr or psrldq
+(define_insn_reservation "slm_sseishft_2" 1
+ (and (eq_attr "cpu" "slm")
+ (ior (eq_attr "type" "sseishft1")
+ (and (eq_attr "type" "sseishft")
+ (and (eq_attr "atom_unit" "sishuf")
+ (match_operand 2 "immediate_operand")))))
+ "slm-simple-0")
+
+;; if reg/mem op
+(define_insn_reservation "slm_sseishft_3" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseishft")
+ (not (match_operand 2 "immediate_operand"))))
+ "slm-complex, slm-all-eu")
+
+(define_insn_reservation "slm_sseimul" 5
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "sseimul"))
+ "slm-fmul-5c")
+
+;; rcpss or rsqrtss
+(define_insn_reservation "slm_sse" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (and (eq_attr "atom_sse_attr" "rcp") (eq_attr "mode" "SF"))))
+ "slm-fmul-4c")
+
+;; movshdup, movsldup. Suggest to type sseishft
+(define_insn_reservation "slm_sse_2" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (eq_attr "atom_sse_attr" "movdup")))
+ "slm-simple-0")
+
+;; lfence
+(define_insn_reservation "slm_sse_3" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (eq_attr "atom_sse_attr" "lfence")))
+ "slm-simple-either")
+
+;; sfence,clflush,mfence, prefetch
+(define_insn_reservation "slm_sse_4" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (ior (eq_attr "atom_sse_attr" "fence")
+ (eq_attr "atom_sse_attr" "prefetch"))))
+ "slm-simple-0")
+
+;; rcpps, rsqrtss, sqrt, ldmxcsr
+(define_insn_reservation "slm_sse_5" 9
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sse")
+ (ior (ior (eq_attr "atom_sse_attr" "sqrt")
+ (eq_attr "atom_sse_attr" "mxcsr"))
+ (and (eq_attr "atom_sse_attr" "rcp")
+ (eq_attr "mode" "V4SF")))))
+ "slm-complex, slm-all-eu*7, nothing")
+
+;; xmm->xmm
+(define_insn_reservation "slm_ssemov" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "xy")
+ (match_operand 1 "register_operand" "xy"))))
+ "slm-simple-either")
+
+;; reg->xmm
+(define_insn_reservation "slm_ssemov_2" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "xy")
+ (match_operand 1 "register_operand" "r"))))
+ "slm-simple-0")
+
+;; xmm->reg
+(define_insn_reservation "slm_ssemov_3" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (and (match_operand 0 "register_operand" "r")
+ (match_operand 1 "register_operand" "xy"))))
+ "slm-simple-0, nothing*2")
+
+;; mov mem
+(define_insn_reservation "slm_ssemov_4" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (and (eq_attr "movu" "0") (eq_attr "memory" "!none"))))
+ "slm-simple-0")
+
+;; movu mem
+(define_insn_reservation "slm_ssemov_5" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemov")
+ (ior (eq_attr "movu" "1") (eq_attr "memory" "!none"))))
+ "slm-simple-0, nothing")
+
+;; no memory simple
+(define_insn_reservation "slm_sseadd" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "memory" "none")
+ (and (eq_attr "mode" "!V2DF")
+ (eq_attr "atom_unit" "!complex")))))
+ "slm-fadd-3c")
+
+;; memory simple
+(define_insn_reservation "slm_sseadd_mem" 3
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseadd")
+ (and (eq_attr "memory" "!none")
+ (and (eq_attr "mode" "!V2DF")
+ (eq_attr "atom_unit" "!complex")))))
+ "slm-fadd-3c")
+
+;; maxps, minps, *pd, hadd, hsub
+(define_insn_reservation "slm_sseadd_3" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseadd")
+ (ior (eq_attr "mode" "V2DF") (eq_attr "atom_unit" "complex"))))
+ "slm-fadd-4c")
+
+;; Except dppd/dpps
+(define_insn_reservation "slm_ssemul" 5
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "!SF")))
+ "slm-fmul-5c")
+
+;; Except dppd/dpps, 4 cycle if mulss
+(define_insn_reservation "slm_ssemul_2" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssemul")
+ (eq_attr "mode" "SF")))
+ "slm-fmul-4c")
+
+(define_insn_reservation "slm_ssecmp" 1
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "ssecmp"))
+ "slm-simple-either")
+
+(define_insn_reservation "slm_ssecomi" 1
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "ssecomi"))
+ "slm-simple-0")
+
+;; no memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation "slm_ssecvt" 5
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "register_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "register_operand")))))
+ "slm-fp-0, slm-feu-0, nothing*3")
+
+;; memory and cvtpi2ps, cvtps2pi, cvttps2pi
+(define_insn_reservation "slm_ssecvt_mem" 5
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "memory_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "memory_operand")))))
+"slm-fp-0, slm-feu-0, nothing*3")
+
+;; cvtpd2pi, cvtpi2pd
+(define_insn_reservation "slm_ssecvt_1" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:V2SI 1 "register_operand"))
+ (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V2DF 1 "register_operand")))))
+ "slm-fp-0, slm-feu-0")
+
+;; memory and cvtpd2pi, cvtpi2pd
+(define_insn_reservation "slm_ssecvt_1_mem" 2
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (ior (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:V2SI 1 "memory_operand"))
+ (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V2DF 1 "memory_operand")))))
+ "slm-fp-0, slm-feu-0")
+
+;; otherwise. 4 cycles average for cvtss2sd
+(define_insn_reservation "slm_ssecvt_3" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "ssecvt")
+ (not (ior (and (match_operand:V2SI 0 "register_operand")
+ (match_operand:V4SF 1 "nonimmediate_operand"))
+ (and (match_operand:V4SF 0 "register_operand")
+ (match_operand:V2SI 1 "nonimmediate_operand"))))))
+ "slm-fp-0, nothing*3")
+
+;; memory and cvtsi2sd
+(define_insn_reservation "slm_sseicvt" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseicvt")
+ (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:SI 1 "nonimmediate_operand"))))
+ "slm-fp-0")
+
+;; otherwise. 8 cycles average for cvtsd2si
+(define_insn_reservation "slm_sseicvt_2" 4
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "sseicvt")
+ (not (and (match_operand:V2DF 0 "register_operand")
+ (match_operand:SI 1 "memory_operand")))))
+ "slm-fp-0, nothing*3")
+
+(define_insn_reservation "slm_ssediv" 13
+ (and (eq_attr "cpu" "slm")
+ (eq_attr "type" "ssediv"))
+ "slm-fp-0, slm-feu-0*10, nothing*2")
+
+;; simple for fmov
+(define_insn_reservation "slm_fmov" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "none")))
+ "slm-simple-either")
+
+;; simple for fmov
+(define_insn_reservation "slm_fmov_mem" 1
+ (and (eq_attr "cpu" "slm")
+ (and (eq_attr "type" "fmov")
+ (eq_attr "memory" "!none")))
+ "slm-simple-either")
+
+;; Define bypass here
+
+;; There will be 0 cycle stall from cmp/test to jcc
+
+;; There will be 1 cycle stall from flag producer to cmov and adc/sbb
+(define_bypass 2 "slm_icmp, slm_test, slm_alu, slm_alu_carry,
+ slm_alu1, slm_negnot, slm_incdec, slm_ishift,
+ slm_ishift1, slm_rotate, slm_rotate1"
+ "slm_icmov, slm_alu_carry")
+
+;; lea to shift source stall is 1 cycle
+(define_bypass 2 "slm_lea"
+ "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1"
+ "!ix86_dep_by_shift_count")
+
+;; non-lea to shift count stall is 1 cycle
+(define_bypass 2 "slm_alu_carry,
+ slm_alu,slm_alu1,slm_negnot,slm_imov,slm_imovx,
+ slm_incdec,slm_ishift,slm_ishift1,slm_rotate,
+ slm_rotate1, slm_setcc, slm_icmov, slm_pop,
+ slm_alu_mem, slm_alu_carry_mem, slm_alu1_mem,
+ slm_imovx_mem, slm_imovx_2_mem,
+ slm_imov_mem, slm_icmov_mem, slm_fmov_mem"
+ "slm_ishift, slm_ishift1, slm_rotate, slm_rotate1,
+ slm_ishift_mem, slm_ishift1_mem,
+ slm_rotate_mem, slm_rotate1_mem"
+ "ix86_dep_by_shift_count")
diff --git a/gcc/config/i386/smmintrin.h b/gcc/config/i386/smmintrin.h
index 3ae916ce5d3..20fa2ca2f94 100644
--- a/gcc/config/i386/smmintrin.h
+++ b/gcc/config/i386/smmintrin.h
@@ -27,14 +27,16 @@
#ifndef _SMMINTRIN_H_INCLUDED
#define _SMMINTRIN_H_INCLUDED
-#ifndef __SSE4_1__
-# error "SSE4.1 instruction set not enabled"
-#else
-
/* We need definitions from the SSSE3, SSE3, SSE2 and SSE header
files. */
#include <tmmintrin.h>
+#ifndef __SSE4_1__
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define __DISABLE_SSE4_1__
+#endif /* __SSE4_1__ */
+
/* Rounding mode macros. */
#define _MM_FROUND_TO_NEAREST_INT 0x00
#define _MM_FROUND_TO_NEG_INF 0x01
@@ -582,7 +584,11 @@ _mm_stream_load_si128 (__m128i *__X)
return (__m128i) __builtin_ia32_movntdqa ((__v2di *) __X);
}
-#ifdef __SSE4_2__
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_2__ */
/* These macros specify the source data format. */
#define _SIDD_UBYTE_OPS 0x00
@@ -792,9 +798,29 @@ _mm_cmpgt_epi64 (__m128i __X, __m128i __Y)
return (__m128i) __builtin_ia32_pcmpgtq ((__v2di)__X, (__v2di)__Y);
}
-#ifdef __POPCNT__
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
+
+#ifdef __DISABLE_SSE4_1__
+#undef __DISABLE_SSE4_1__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_1__ */
+
#include <popcntintrin.h>
-#endif
+
+#ifndef __SSE4_1__
+#pragma GCC push_options
+#pragma GCC target("sse4.1")
+#define __DISABLE_SSE4_1__
+#endif /* __SSE4_1__ */
+
+#ifndef __SSE4_2__
+#pragma GCC push_options
+#pragma GCC target("sse4.2")
+#define __DISABLE_SSE4_2__
+#endif /* __SSE4_1__ */
/* Accumulate CRC32 (polynomial 0x11EDC6F41) value. */
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -823,8 +849,14 @@ _mm_crc32_u64 (unsigned long long __C, unsigned long long __V)
}
#endif
-#endif /* __SSE4_2__ */
+#ifdef __DISABLE_SSE4_2__
+#undef __DISABLE_SSE4_2__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_2__ */
-#endif /* __SSE4_1__ */
+#ifdef __DISABLE_SSE4_1__
+#undef __DISABLE_SSE4_1__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE4_1__ */
#endif /* _SMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/t-cygming b/gcc/config/i386/t-cygming
index f615ad7a2af..ba076a7f49a 100644
--- a/gcc/config/i386/t-cygming
+++ b/gcc/config/i386/t-cygming
@@ -22,7 +22,7 @@ LIMITS_H_TEST = true
winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
- $(TM_P_H) $(HASHTAB_H) $(GGC_H) $(LTO_STREAMER_H)
+ $(TM_P_H) $(HASH_TABLE_H) $(GGC_H) $(LTO_STREAMER_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/i386/winnt.c
diff --git a/gcc/config/i386/t-interix b/gcc/config/i386/t-interix
index 43443e72a45..4d7b5987037 100644
--- a/gcc/config/i386/t-interix
+++ b/gcc/config/i386/t-interix
@@ -18,7 +18,7 @@
winnt.o: $(srcdir)/config/i386/winnt.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
$(TM_H) $(RTL_H) $(REGS_H) hard-reg-set.h output.h $(TREE_H) flags.h \
- $(TM_P_H) $(HASHTAB_H) $(GGC_H)
+ $(TM_P_H) $(HASH_TABLE_H) $(GGC_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) \
$(srcdir)/config/i386/winnt.c
diff --git a/gcc/config/i386/tbmintrin.h b/gcc/config/i386/tbmintrin.h
index 07c4f77fdd6..9235d6c713d 100644
--- a/gcc/config/i386/tbmintrin.h
+++ b/gcc/config/i386/tbmintrin.h
@@ -25,13 +25,15 @@
# error "Never use <tbmintrin.h> directly; include <x86intrin.h> instead."
#endif
-#ifndef __TBM__
-# error "TBM instruction set not enabled"
-#endif /* __TBM__ */
-
#ifndef _TBMINTRIN_H_INCLUDED
#define _TBMINTRIN_H_INCLUDED
+#ifndef __TBM__
+#pragma GCC push_options
+#pragma GCC target("tbm")
+#define __DISABLE_TBM__
+#endif /* __TBM__ */
+
#ifdef __OPTIMIZE__
extern __inline unsigned int __attribute__((__gnu_inline__, __always_inline__, __artificial__))
__bextri_u32 (unsigned int __X, const unsigned int __I)
@@ -169,4 +171,10 @@ __tzmsk_u64 (unsigned long long __X)
#endif /* __x86_64__ */
+
+#ifdef __DISABLE_TBM__
+#undef __DISABLE_TBM__
+#pragma GCC pop_options
+#endif /* __DISABLE_TBM__ */
+
#endif /* _TBMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/tmmintrin.h b/gcc/config/i386/tmmintrin.h
index 767b199d3c4..3f63b4f8934 100644
--- a/gcc/config/i386/tmmintrin.h
+++ b/gcc/config/i386/tmmintrin.h
@@ -27,13 +27,15 @@
#ifndef _TMMINTRIN_H_INCLUDED
#define _TMMINTRIN_H_INCLUDED
-#ifndef __SSSE3__
-# error "SSSE3 instruction set not enabled"
-#else
-
/* We need definitions from the SSE3, SSE2 and SSE header files*/
#include <pmmintrin.h>
+#ifndef __SSSE3__
+#pragma GCC push_options
+#pragma GCC target("ssse3")
+#define __DISABLE_SSSE3__
+#endif /* __SSSE3__ */
+
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_hadd_epi16 (__m128i __X, __m128i __Y)
{
@@ -239,6 +241,9 @@ _mm_abs_pi32 (__m64 __X)
return (__m64) __builtin_ia32_pabsd ((__v2si)__X);
}
-#endif /* __SSSE3__ */
+#ifdef __DISABLE_SSSE3__
+#undef __DISABLE_SSSE3__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSSE3__ */
#endif /* _TMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index f0f972c56d9..c9e3aa98a37 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -30,7 +30,7 @@ along with GCC; see the file COPYING3. If not see
#include "flags.h"
#include "tm_p.h"
#include "diagnostic-core.h"
-#include "hashtab.h"
+#include "hash-table.h"
#include "langhooks.h"
#include "ggc.h"
#include "target.h"
@@ -449,7 +449,7 @@ i386_pe_reloc_rw_mask (void)
unsigned int
i386_pe_section_type_flags (tree decl, const char *name, int reloc)
{
- static htab_t htab;
+ static hash_table <pointer_hash <unsigned int> > htab;
unsigned int flags;
unsigned int **slot;
@@ -460,8 +460,8 @@ i386_pe_section_type_flags (tree decl, const char *name, int reloc)
/* The names we put in the hashtable will always be the unique
versions given to us by the stringtable, so we can just use
their addresses as the keys. */
- if (!htab)
- htab = htab_create (31, htab_hash_pointer, htab_eq_pointer, NULL);
+ if (!htab.is_created ())
+ htab.create (31);
if (decl && TREE_CODE (decl) == FUNCTION_DECL)
flags = SECTION_CODE;
@@ -480,7 +480,7 @@ i386_pe_section_type_flags (tree decl, const char *name, int reloc)
flags |= SECTION_LINKONCE;
/* See if we already have an entry for this section. */
- slot = (unsigned int **) htab_find_slot (htab, name, INSERT);
+ slot = htab.find_slot ((unsigned int *)name, INSERT);
if (!*slot)
{
*slot = (unsigned int *) xmalloc (sizeof (unsigned int));
@@ -714,12 +714,29 @@ i386_pe_record_stub (const char *name)
#ifdef CXX_WRAP_SPEC_LIST
+/* Hashtable helpers. */
+
+struct wrapped_symbol_hasher : typed_noop_remove <char>
+{
+ typedef char value_type;
+ typedef char compare_type;
+ static inline hashval_t hash (const value_type *);
+ static inline bool equal (const value_type *, const compare_type *);
+ static inline void remove (value_type *);
+};
+
+inline hashval_t
+wrapped_symbol_hasher::hash (const value_type *v)
+{
+ return htab_hash_string (v);
+}
+
/* Hash table equality helper function. */
-static int
-wrapper_strcmp (const void *x, const void *y)
+inline bool
+wrapped_symbol_hasher::equal (const value_type *x, const compare_type *y)
{
- return !strcmp ((const char *) x, (const char *) y);
+ return !strcmp (x, y);
}
/* Search for a function named TARGET in the list of library wrappers
@@ -733,7 +750,7 @@ static const char *
i386_find_on_wrapper_list (const char *target)
{
static char first_time = 1;
- static htab_t wrappers;
+ static hash_table <wrapped_symbol_hasher> wrappers;
if (first_time)
{
@@ -746,8 +763,7 @@ i386_find_on_wrapper_list (const char *target)
char *bufptr;
/* Breaks up the char array into separated strings
strings and enter them into the hash table. */
- wrappers = htab_create_alloc (8, htab_hash_string, wrapper_strcmp,
- 0, xcalloc, free);
+ wrappers.create (8);
for (bufptr = wrapper_list_buffer; *bufptr; ++bufptr)
{
char *found = NULL;
@@ -760,12 +776,12 @@ i386_find_on_wrapper_list (const char *target)
if (*bufptr)
*bufptr = 0;
if (found)
- *htab_find_slot (wrappers, found, INSERT) = found;
+ *wrappers.find_slot (found, INSERT) = found;
}
first_time = 0;
}
- return (const char *) htab_find (wrappers, target);
+ return wrappers.find (target);
}
#endif /* CXX_WRAP_SPEC_LIST */
diff --git a/gcc/config/i386/wmmintrin.h b/gcc/config/i386/wmmintrin.h
index 93c24f41ce6..defcfd82acc 100644
--- a/gcc/config/i386/wmmintrin.h
+++ b/gcc/config/i386/wmmintrin.h
@@ -30,13 +30,14 @@
/* We need definitions from the SSE2 header file. */
#include <emmintrin.h>
-#if !defined (__AES__) && !defined (__PCLMUL__)
-# error "AES/PCLMUL instructions not enabled"
-#else
-
/* AES */
-#ifdef __AES__
+#ifndef __AES__
+#pragma GCC push_options
+#pragma GCC target("aes")
+#define __DISABLE_AES__
+#endif /* __AES__ */
+
/* Performs 1 round of AES decryption of the first m128i using
the second m128i as a round key. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
@@ -92,11 +93,20 @@ _mm_aeskeygenassist_si128 (__m128i __X, const int __C)
((__m128i) __builtin_ia32_aeskeygenassist128 ((__v2di)(__m128i)(X), \
(int)(C)))
#endif
-#endif /* __AES__ */
+
+#ifdef __DISABLE_AES__
+#undef __DISABLE_AES__
+#pragma GCC pop_options
+#endif /* __DISABLE_AES__ */
/* PCLMUL */
-#ifdef __PCLMUL__
+#ifndef __PCLMUL__
+#pragma GCC push_options
+#pragma GCC target("pclmul")
+#define __DISABLE_PCLMUL__
+#endif /* __PCLMUL__ */
+
/* Performs carry-less integer multiplication of 64-bit halves of
128-bit input operands. The third parameter inducates which 64-bit
haves of the input parameters v1 and v2 should be used. It must be
@@ -113,8 +123,10 @@ _mm_clmulepi64_si128 (__m128i __X, __m128i __Y, const int __I)
((__m128i) __builtin_ia32_pclmulqdq128 ((__v2di)(__m128i)(X), \
(__v2di)(__m128i)(Y), (int)(I)))
#endif
-#endif /* __PCLMUL__ */
-#endif /* __AES__/__PCLMUL__ */
+#ifdef __DISABLE_PCLMUL__
+#undef __DISABLE_PCLMUL__
+#pragma GCC pop_options
+#endif /* __DISABLE_PCLMUL__ */
#endif /* _WMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/x86intrin.h b/gcc/config/i386/x86intrin.h
index 5bf29d5d361..46ced969a9f 100644
--- a/gcc/config/i386/x86intrin.h
+++ b/gcc/config/i386/x86intrin.h
@@ -26,96 +26,52 @@
#include <ia32intrin.h>
-#ifdef __MMX__
#include <mmintrin.h>
-#endif
-#ifdef __SSE__
#include <xmmintrin.h>
-#endif
-#ifdef __SSE2__
#include <emmintrin.h>
-#endif
-#ifdef __SSE3__
#include <pmmintrin.h>
-#endif
-#ifdef __SSSE3__
#include <tmmintrin.h>
-#endif
-#ifdef __SSE4A__
#include <ammintrin.h>
-#endif
-#if defined (__SSE4_2__) || defined (__SSE4_1__)
#include <smmintrin.h>
-#endif
-#if defined (__AES__) || defined (__PCLMUL__)
#include <wmmintrin.h>
-#endif
/* For including AVX instructions */
#include <immintrin.h>
-#ifdef __3dNOW__
#include <mm3dnow.h>
-#endif
-#ifdef __FMA4__
#include <fma4intrin.h>
-#endif
-#ifdef __XOP__
#include <xopintrin.h>
-#endif
-#ifdef __LWP__
#include <lwpintrin.h>
-#endif
-#ifdef __BMI__
#include <bmiintrin.h>
-#endif
-#ifdef __BMI2__
#include <bmi2intrin.h>
-#endif
-#ifdef __TBM__
#include <tbmintrin.h>
-#endif
-#ifdef __LZCNT__
#include <lzcntintrin.h>
-#endif
-#ifdef __POPCNT__
#include <popcntintrin.h>
-#endif
-#ifdef __RDSEED__
#include <rdseedintrin.h>
-#endif
-#ifdef __PRFCHW__
#include <prfchwintrin.h>
-#endif
-#ifdef __FXSR__
#include <fxsrintrin.h>
-#endif
-#ifdef __XSAVE__
#include <xsaveintrin.h>
-#endif
-#ifdef __XSAVEOPT__
#include <xsaveoptintrin.h>
-#endif
#include <adxintrin.h>
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index a223562490e..14d1e7fe2b0 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -27,16 +27,18 @@
#ifndef _XMMINTRIN_H_INCLUDED
#define _XMMINTRIN_H_INCLUDED
-#ifndef __SSE__
-# error "SSE instruction set not enabled"
-#else
-
/* We need type definitions from the MMX header file. */
#include <mmintrin.h>
/* Get _mm_malloc () and _mm_free (). */
#include <mm_malloc.h>
+#ifndef __SSE__
+#pragma GCC push_options
+#pragma GCC target("sse")
+#define __DISABLE_SSE__
+#endif /* __SSE__ */
+
/* The Intel API is flexible enough that we must allow aliasing with other
vector types, and their scalar components. */
typedef float __m128 __attribute__ ((__vector_size__ (16), __may_alias__));
@@ -1242,9 +1244,11 @@ do { \
} while (0)
/* For backward source compatibility. */
-#ifdef __SSE2__
# include <emmintrin.h>
-#endif
-#endif /* __SSE__ */
+#ifdef __DISABLE_SSE__
+#undef __DISABLE_SSE__
+#pragma GCC pop_options
+#endif /* __DISABLE_SSE__ */
+
#endif /* _XMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xopintrin.h b/gcc/config/i386/xopintrin.h
index 66b0f0de5c9..e0d148a0818 100644
--- a/gcc/config/i386/xopintrin.h
+++ b/gcc/config/i386/xopintrin.h
@@ -28,12 +28,14 @@
#ifndef _XOPMMINTRIN_H_INCLUDED
#define _XOPMMINTRIN_H_INCLUDED
-#ifndef __XOP__
-# error "XOP instruction set not enabled"
-#else
-
#include <fma4intrin.h>
+#ifndef __XOP__
+#pragma GCC push_options
+#pragma GCC target("xop")
+#define __DISABLE_XOP__
+#endif /* __XOP__ */
+
/* Integer multiply/add intructions. */
extern __inline __m128i __attribute__((__gnu_inline__, __always_inline__, __artificial__))
_mm_maccs_epi16(__m128i __A, __m128i __B, __m128i __C)
@@ -830,6 +832,9 @@ _mm256_permute2_ps (__m256 __X, __m256 __Y, __m256i __C, const int __I)
(int)(I)))
#endif /* __OPTIMIZE__ */
-#endif /* __XOP__ */
+#ifdef __DISABLE_XOP__
+#undef __DISABLE_XOP__
+#pragma GCC pop_options
+#endif /* __DISABLE_XOP__ */
#endif /* _XOPMMINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xsaveintrin.h b/gcc/config/i386/xsaveintrin.h
index f5665894084..31c17b1d2c5 100644
--- a/gcc/config/i386/xsaveintrin.h
+++ b/gcc/config/i386/xsaveintrin.h
@@ -28,6 +28,12 @@
#ifndef _XSAVEINTRIN_H_INCLUDED
#define _XSAVEINTRIN_H_INCLUDED
+#ifndef __XSAVE__
+#pragma GCC push_options
+#pragma GCC target("xsave")
+#define __DISABLE_XSAVE__
+#endif /* __XSAVE__ */
+
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsave (void *__P, long long __M)
@@ -58,4 +64,9 @@ _xrstor64 (void *__P, long long __M)
}
#endif
+#ifdef __DISABLE_XSAVE__
+#undef __DISABLE_XSAVE__
+#pragma GCC pop_options
+#endif /* __DISABLE_XSAVE__ */
+
#endif /* _XSAVEINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xsaveoptintrin.h b/gcc/config/i386/xsaveoptintrin.h
index 0d73e341f3f..aa9538da33e 100644
--- a/gcc/config/i386/xsaveoptintrin.h
+++ b/gcc/config/i386/xsaveoptintrin.h
@@ -28,6 +28,12 @@
#ifndef _XSAVEOPTINTRIN_H_INCLUDED
#define _XSAVEOPTINTRIN_H_INCLUDED
+#ifndef __XSAVEOPT__
+#pragma GCC push_options
+#pragma GCC target("xsaveopt")
+#define __DISABLE_XSAVEOPT__
+#endif /* __XSAVEOPT__ */
+
extern __inline void
__attribute__((__gnu_inline__, __always_inline__, __artificial__))
_xsaveopt (void *__P, long long __M)
@@ -44,4 +50,9 @@ _xsaveopt64 (void *__P, long long __M)
}
#endif
+#ifdef __DISABLE_XSAVEOPT__
+#undef __DISABLE_XSAVEOPT__
+#pragma GCC pop_options
+#endif /* __DISABLE_XSAVEOPT__ */
+
#endif /* _XSAVEOPTINTRIN_H_INCLUDED */
diff --git a/gcc/config/i386/xtestintrin.h b/gcc/config/i386/xtestintrin.h
index c82fb7a61ae..a6afa896b4f 100644
--- a/gcc/config/i386/xtestintrin.h
+++ b/gcc/config/i386/xtestintrin.h
@@ -25,13 +25,15 @@
# error "Never use <xtestintrin.h> directly; include <immintrin.h> instead."
#endif
-#ifndef __RTM__
-# error "RTM instruction set not enabled"
-#endif /* __RTM__ */
-
#ifndef _XTESTINTRIN_H_INCLUDED
#define _XTESTINTRIN_H_INCLUDED
+#ifndef __RTM__
+#pragma GCC push_options
+#pragma GCC target("rtm")
+#define __DISABLE_RTM__
+#endif /* __RTM__ */
+
/* Return non-zero if the instruction executes inside an RTM or HLE code
region. Return zero otherwise. */
extern __inline int
@@ -41,4 +43,9 @@ _xtest (void)
return __builtin_ia32_xtest ();
}
+#ifdef __DISABLE_RTM__
+#undef __DISABLE_RTM__
+#pragma GCC pop_options
+#endif /* __DISABLE_RTM__ */
+
#endif /* _XTESTINTRIN_H_INCLUDED */
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 144cf7ee5ee..a128b19c7ca 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -47,7 +47,7 @@ along with GCC; see the file COPYING3. If not see
#include "target-def.h"
#include "common/common-target.h"
#include "tm_p.h"
-#include "hashtab.h"
+#include "hash-table.h"
#include "langhooks.h"
#include "gimple.h"
#include "intl.h"
@@ -170,7 +170,7 @@ static ds_t ia64_get_insn_spec_ds (rtx);
static ds_t ia64_get_insn_checked_ds (rtx);
static bool ia64_skip_rtx_p (const_rtx);
static int ia64_speculate_insn (rtx, ds_t, rtx *);
-static bool ia64_needs_block_p (int);
+static bool ia64_needs_block_p (ds_t);
static rtx ia64_gen_spec_check (rtx, rtx, ds_t);
static int ia64_spec_check_p (rtx);
static int ia64_spec_check_src_p (rtx);
@@ -257,8 +257,6 @@ static struct bundle_state *get_free_bundle_state (void);
static void free_bundle_state (struct bundle_state *);
static void initiate_bundle_states (void);
static void finish_bundle_states (void);
-static unsigned bundle_state_hash (const void *);
-static int bundle_state_eq_p (const void *, const void *);
static int insert_bundle_state (struct bundle_state *);
static void initiate_bundle_state_table (void);
static void finish_bundle_state_table (void);
@@ -8341,9 +8339,7 @@ ia64_needs_block_p (ds_t ts)
return !(mflag_sched_spec_control_ldc && mflag_sched_spec_ldc);
}
-/* Generate (or regenerate, if (MUTATE_P)) recovery check for INSN.
- If (LABEL != 0 || MUTATE_P), generate branchy recovery check.
- Otherwise, generate a simple check. */
+/* Generate (or regenerate) a recovery check for INSN. */
static rtx
ia64_gen_spec_check (rtx insn, rtx label, ds_t ds)
{
@@ -8528,18 +8524,21 @@ finish_bundle_states (void)
}
}
-/* Hash table of the bundle states. The key is dfa_state and insn_num
- of the bundle states. */
+/* Hashtable helpers. */
-static htab_t bundle_state_table;
+struct bundle_state_hasher : typed_noop_remove <bundle_state>
+{
+ typedef bundle_state value_type;
+ typedef bundle_state compare_type;
+ static inline hashval_t hash (const value_type *);
+ static inline bool equal (const value_type *, const compare_type *);
+};
/* The function returns hash of BUNDLE_STATE. */
-static unsigned
-bundle_state_hash (const void *bundle_state)
+inline hashval_t
+bundle_state_hasher::hash (const value_type *state)
{
- const struct bundle_state *const state
- = (const struct bundle_state *) bundle_state;
unsigned result, i;
for (result = i = 0; i < dfa_state_size; i++)
@@ -8550,19 +8549,20 @@ bundle_state_hash (const void *bundle_state)
/* The function returns nonzero if the bundle state keys are equal. */
-static int
-bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
+inline bool
+bundle_state_hasher::equal (const value_type *state1,
+ const compare_type *state2)
{
- const struct bundle_state *const state1
- = (const struct bundle_state *) bundle_state_1;
- const struct bundle_state *const state2
- = (const struct bundle_state *) bundle_state_2;
-
return (state1->insn_num == state2->insn_num
&& memcmp (state1->dfa_state, state2->dfa_state,
dfa_state_size) == 0);
}
+/* Hash table of the bundle states. The key is dfa_state and insn_num
+ of the bundle states. */
+
+static hash_table <bundle_state_hasher> bundle_state_table;
+
/* The function inserts the BUNDLE_STATE into the hash table. The
function returns nonzero if the bundle has been inserted into the
table. The table contains the best bundle state with given key. */
@@ -8570,39 +8570,35 @@ bundle_state_eq_p (const void *bundle_state_1, const void *bundle_state_2)
static int
insert_bundle_state (struct bundle_state *bundle_state)
{
- void **entry_ptr;
+ struct bundle_state **entry_ptr;
- entry_ptr = htab_find_slot (bundle_state_table, bundle_state, INSERT);
+ entry_ptr = bundle_state_table.find_slot (bundle_state, INSERT);
if (*entry_ptr == NULL)
{
bundle_state->next = index_to_bundle_states [bundle_state->insn_num];
index_to_bundle_states [bundle_state->insn_num] = bundle_state;
- *entry_ptr = (void *) bundle_state;
+ *entry_ptr = bundle_state;
return TRUE;
}
- else if (bundle_state->cost < ((struct bundle_state *) *entry_ptr)->cost
- || (bundle_state->cost == ((struct bundle_state *) *entry_ptr)->cost
- && (((struct bundle_state *)*entry_ptr)->accumulated_insns_num
+ else if (bundle_state->cost < (*entry_ptr)->cost
+ || (bundle_state->cost == (*entry_ptr)->cost
+ && ((*entry_ptr)->accumulated_insns_num
> bundle_state->accumulated_insns_num
- || (((struct bundle_state *)
- *entry_ptr)->accumulated_insns_num
+ || ((*entry_ptr)->accumulated_insns_num
== bundle_state->accumulated_insns_num
- && (((struct bundle_state *)
- *entry_ptr)->branch_deviation
+ && ((*entry_ptr)->branch_deviation
> bundle_state->branch_deviation
- || (((struct bundle_state *)
- *entry_ptr)->branch_deviation
+ || ((*entry_ptr)->branch_deviation
== bundle_state->branch_deviation
- && ((struct bundle_state *)
- *entry_ptr)->middle_bundle_stops
+ && (*entry_ptr)->middle_bundle_stops
> bundle_state->middle_bundle_stops))))))
{
struct bundle_state temp;
- temp = *(struct bundle_state *) *entry_ptr;
- *(struct bundle_state *) *entry_ptr = *bundle_state;
- ((struct bundle_state *) *entry_ptr)->next = temp.next;
+ temp = **entry_ptr;
+ **entry_ptr = *bundle_state;
+ (*entry_ptr)->next = temp.next;
*bundle_state = temp;
}
return FALSE;
@@ -8613,8 +8609,7 @@ insert_bundle_state (struct bundle_state *bundle_state)
static void
initiate_bundle_state_table (void)
{
- bundle_state_table = htab_create (50, bundle_state_hash, bundle_state_eq_p,
- (htab_del) 0);
+ bundle_state_table.create (50);
}
/* Finish work with the hash table. */
@@ -8622,7 +8617,7 @@ initiate_bundle_state_table (void)
static void
finish_bundle_state_table (void)
{
- htab_delete (bundle_state_table);
+ bundle_state_table.dispose ();
}
diff --git a/gcc/config/ia64/t-ia64 b/gcc/config/ia64/t-ia64
index 5c3ac644be3..b009cdf2bc5 100644
--- a/gcc/config/ia64/t-ia64
+++ b/gcc/config/ia64/t-ia64
@@ -24,4 +24,5 @@ ia64-c.o: $(srcdir)/config/ia64/ia64-c.c $(CONFIG_H) $(SYSTEM_H) \
# genattrtab generates very long string literals.
insn-attrtab.o-warn = -Wno-error
-ia64.o: debug.h $(PARAMS_H) sel-sched.h reload.h $(OPTS_H) dumpfile.h
+ia64.o: $(srcdir)/config/ia64/ia64.c debug.h $(PARAMS_H) sel-sched.h reload.h \
+ $(OPTS_H) dumpfile.h $(HASH_TABLE_H)
diff --git a/gcc/config/mips/constraints.md b/gcc/config/mips/constraints.md
index ddef8cc495d..1fe6119d075 100644
--- a/gcc/config/mips/constraints.md
+++ b/gcc/config/mips/constraints.md
@@ -92,6 +92,12 @@
;; but the DSP version allows any accumulator target.
(define_register_constraint "ka" "ISA_HAS_DSP_MULT ? ACC_REGS : MD_REGS")
+;; The register class to use for an allocatable division result.
+;; MIPS16 uses M16_REGS because LO is fixed.
+(define_register_constraint "kl"
+ "TARGET_MIPS16 ? M16_REGS : TARGET_BIG_ENDIAN ? MD1_REG : MD0_REG"
+ "@internal")
+
(define_constraint "kf"
"@internal"
(match_operand 0 "force_to_mem_operand"))
diff --git a/gcc/config/mips/mips-cpus.def b/gcc/config/mips/mips-cpus.def
index 9e5fd162189..a1c65915f78 100644
--- a/gcc/config/mips/mips-cpus.def
+++ b/gcc/config/mips/mips-cpus.def
@@ -43,7 +43,7 @@ MIPS_CPU ("mips4", PROCESSOR_R8000, 4, 0)
that to a recommendation to avoid the instructions in code that
isn't tuned to a specific processor. */
MIPS_CPU ("mips32", PROCESSOR_4KC, 32, PTF_AVOID_BRANCHLIKELY)
-MIPS_CPU ("mips32r2", PROCESSOR_M4K, 33, PTF_AVOID_BRANCHLIKELY)
+MIPS_CPU ("mips32r2", PROCESSOR_74KF2_1, 33, PTF_AVOID_BRANCHLIKELY)
MIPS_CPU ("mips64", PROCESSOR_5KC, 64, PTF_AVOID_BRANCHLIKELY)
/* ??? For now just tune the generic MIPS64r2 for 5KC as well. */
MIPS_CPU ("mips64r2", PROCESSOR_5KC, 65, PTF_AVOID_BRANCHLIKELY)
@@ -68,6 +68,7 @@ MIPS_CPU ("r4600", PROCESSOR_R4600, 3, 0)
MIPS_CPU ("orion", PROCESSOR_R4600, 3, 0)
MIPS_CPU ("r4650", PROCESSOR_R4650, 3, 0)
MIPS_CPU ("r4700", PROCESSOR_R4700, 3, 0)
+MIPS_CPU ("r5900", PROCESSOR_R5900, 3, 0)
/* ST Loongson 2E/2F processors. */
MIPS_CPU ("loongson2e", PROCESSOR_LOONGSON_2E, 3, PTF_AVOID_BRANCHLIKELY)
MIPS_CPU ("loongson2f", PROCESSOR_LOONGSON_2F, 3, PTF_AVOID_BRANCHLIKELY)
@@ -94,6 +95,8 @@ MIPS_CPU ("4ksc", PROCESSOR_4KC, 32, 0)
MIPS_CPU ("m4k", PROCESSOR_M4K, 33, 0)
MIPS_CPU ("m14kc", PROCESSOR_M4K, 33, 0)
MIPS_CPU ("m14k", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("m14ke", PROCESSOR_M4K, 33, 0)
+MIPS_CPU ("m14kec", PROCESSOR_M4K, 33, 0)
MIPS_CPU ("4kec", PROCESSOR_4KC, 33, 0)
MIPS_CPU ("4kem", PROCESSOR_4KC, 33, 0)
MIPS_CPU ("4kep", PROCESSOR_4KP, 33, 0)
diff --git a/gcc/config/mips/mips-dsp.md b/gcc/config/mips/mips-dsp.md
index 002c9992001..49a08689638 100644
--- a/gcc/config/mips/mips-dsp.md
+++ b/gcc/config/mips/mips-dsp.md
@@ -1131,8 +1131,7 @@
"ISA_HAS_L<SHORT:SIZE><U>X"
"l<SHORT:size><u>x\t%0,%2(%1)"
[(set_attr "type" "load")
- (set_attr "mode" "<GPR:MODE>")
- (set_attr "length" "4")])
+ (set_attr "mode" "<GPR:MODE>")])
(define_expand "mips_lhx"
[(match_operand:SI 0 "register_operand")
@@ -1165,8 +1164,7 @@
"ISA_HAS_L<GPR:SIZE>X"
"l<GPR:size>x\t%0,%2(%1)"
[(set_attr "type" "load")
- (set_attr "mode" "<GPR:MODE>")
- (set_attr "length" "4")])
+ (set_attr "mode" "<GPR:MODE>")])
(define_insn "*mips_lw<u>x_<P:mode>_ext"
[(set (match_operand:DI 0 "register_operand" "=d")
@@ -1176,8 +1174,7 @@
"ISA_HAS_LW<U>X && TARGET_64BIT"
"lw<u>x\t%0,%2(%1)"
[(set_attr "type" "load")
- (set_attr "mode" "DI")
- (set_attr "length" "4")])
+ (set_attr "mode" "DI")])
;; Table 2-8. MIPS DSP ASE Instructions: Branch
;; BPOSGE32
diff --git a/gcc/config/mips/mips-ps-3d.md b/gcc/config/mips/mips-ps-3d.md
index 9c70cc4324f..a22c7829b77 100644
--- a/gcc/config/mips/mips-ps-3d.md
+++ b/gcc/config/mips/mips-ps-3d.md
@@ -481,7 +481,7 @@
operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
}
[(set_attr "type" "fcmp")
- (set_attr "length" "8")
+ (set_attr "insn_count" "2")
(set_attr "mode" "FPSW")])
(define_insn_and_split "mips_cabs_cond_4s"
@@ -510,7 +510,7 @@
operands[7] = simplify_gen_subreg (CCV2mode, operands[0], CCV4mode, 8);
}
[(set_attr "type" "fcmp")
- (set_attr "length" "8")
+ (set_attr "insn_count" "2")
(set_attr "mode" "FPSW")])
diff --git a/gcc/config/mips/mips-tables.opt b/gcc/config/mips/mips-tables.opt
index 0d7fa26510d..409356e1af7 100644
--- a/gcc/config/mips/mips-tables.opt
+++ b/gcc/config/mips/mips-tables.opt
@@ -208,425 +208,437 @@ EnumValue
Enum(mips_arch_opt_value) String(4700) Value(22)
EnumValue
-Enum(mips_arch_opt_value) String(loongson2e) Value(23) Canonical
+Enum(mips_arch_opt_value) String(r5900) Value(23) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(loongson2f) Value(24) Canonical
+Enum(mips_arch_opt_value) String(5900) Value(23)
EnumValue
-Enum(mips_arch_opt_value) String(r8000) Value(25) Canonical
+Enum(mips_arch_opt_value) String(loongson2e) Value(24) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r8k) Value(25)
+Enum(mips_arch_opt_value) String(loongson2f) Value(25) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(8000) Value(25)
+Enum(mips_arch_opt_value) String(r8000) Value(26) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(8k) Value(25)
+Enum(mips_arch_opt_value) String(r8k) Value(26)
EnumValue
-Enum(mips_arch_opt_value) String(r10000) Value(26) Canonical
+Enum(mips_arch_opt_value) String(8000) Value(26)
EnumValue
-Enum(mips_arch_opt_value) String(r10k) Value(26)
+Enum(mips_arch_opt_value) String(8k) Value(26)
EnumValue
-Enum(mips_arch_opt_value) String(10000) Value(26)
+Enum(mips_arch_opt_value) String(r10000) Value(27) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(10k) Value(26)
+Enum(mips_arch_opt_value) String(r10k) Value(27)
EnumValue
-Enum(mips_arch_opt_value) String(r12000) Value(27) Canonical
+Enum(mips_arch_opt_value) String(10000) Value(27)
EnumValue
-Enum(mips_arch_opt_value) String(r12k) Value(27)
+Enum(mips_arch_opt_value) String(10k) Value(27)
EnumValue
-Enum(mips_arch_opt_value) String(12000) Value(27)
+Enum(mips_arch_opt_value) String(r12000) Value(28) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(12k) Value(27)
+Enum(mips_arch_opt_value) String(r12k) Value(28)
EnumValue
-Enum(mips_arch_opt_value) String(r14000) Value(28) Canonical
+Enum(mips_arch_opt_value) String(12000) Value(28)
EnumValue
-Enum(mips_arch_opt_value) String(r14k) Value(28)
+Enum(mips_arch_opt_value) String(12k) Value(28)
EnumValue
-Enum(mips_arch_opt_value) String(14000) Value(28)
+Enum(mips_arch_opt_value) String(r14000) Value(29) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(14k) Value(28)
+Enum(mips_arch_opt_value) String(r14k) Value(29)
EnumValue
-Enum(mips_arch_opt_value) String(r16000) Value(29) Canonical
+Enum(mips_arch_opt_value) String(14000) Value(29)
EnumValue
-Enum(mips_arch_opt_value) String(r16k) Value(29)
+Enum(mips_arch_opt_value) String(14k) Value(29)
EnumValue
-Enum(mips_arch_opt_value) String(16000) Value(29)
+Enum(mips_arch_opt_value) String(r16000) Value(30) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(16k) Value(29)
+Enum(mips_arch_opt_value) String(r16k) Value(30)
EnumValue
-Enum(mips_arch_opt_value) String(vr5000) Value(30) Canonical
+Enum(mips_arch_opt_value) String(16000) Value(30)
EnumValue
-Enum(mips_arch_opt_value) String(vr5k) Value(30)
+Enum(mips_arch_opt_value) String(16k) Value(30)
EnumValue
-Enum(mips_arch_opt_value) String(5000) Value(30)
+Enum(mips_arch_opt_value) String(vr5000) Value(31) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(5k) Value(30)
+Enum(mips_arch_opt_value) String(vr5k) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(r5000) Value(30)
+Enum(mips_arch_opt_value) String(5000) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(r5k) Value(30)
+Enum(mips_arch_opt_value) String(5k) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(vr5400) Value(31) Canonical
+Enum(mips_arch_opt_value) String(r5000) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(5400) Value(31)
+Enum(mips_arch_opt_value) String(r5k) Value(31)
EnumValue
-Enum(mips_arch_opt_value) String(r5400) Value(31)
+Enum(mips_arch_opt_value) String(vr5400) Value(32) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(vr5500) Value(32) Canonical
+Enum(mips_arch_opt_value) String(5400) Value(32)
EnumValue
-Enum(mips_arch_opt_value) String(5500) Value(32)
+Enum(mips_arch_opt_value) String(r5400) Value(32)
EnumValue
-Enum(mips_arch_opt_value) String(r5500) Value(32)
+Enum(mips_arch_opt_value) String(vr5500) Value(33) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(rm7000) Value(33) Canonical
+Enum(mips_arch_opt_value) String(5500) Value(33)
EnumValue
-Enum(mips_arch_opt_value) String(rm7k) Value(33)
+Enum(mips_arch_opt_value) String(r5500) Value(33)
EnumValue
-Enum(mips_arch_opt_value) String(7000) Value(33)
+Enum(mips_arch_opt_value) String(rm7000) Value(34) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(7k) Value(33)
+Enum(mips_arch_opt_value) String(rm7k) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(r7000) Value(33)
+Enum(mips_arch_opt_value) String(7000) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(r7k) Value(33)
+Enum(mips_arch_opt_value) String(7k) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(rm9000) Value(34) Canonical
+Enum(mips_arch_opt_value) String(r7000) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(rm9k) Value(34)
+Enum(mips_arch_opt_value) String(r7k) Value(34)
EnumValue
-Enum(mips_arch_opt_value) String(9000) Value(34)
+Enum(mips_arch_opt_value) String(rm9000) Value(35) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(9k) Value(34)
+Enum(mips_arch_opt_value) String(rm9k) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(r9000) Value(34)
+Enum(mips_arch_opt_value) String(9000) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(r9k) Value(34)
+Enum(mips_arch_opt_value) String(9k) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(4kc) Value(35) Canonical
+Enum(mips_arch_opt_value) String(r9000) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(r4kc) Value(35)
+Enum(mips_arch_opt_value) String(r9k) Value(35)
EnumValue
-Enum(mips_arch_opt_value) String(4km) Value(36) Canonical
+Enum(mips_arch_opt_value) String(4kc) Value(36) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4km) Value(36)
+Enum(mips_arch_opt_value) String(r4kc) Value(36)
EnumValue
-Enum(mips_arch_opt_value) String(4kp) Value(37) Canonical
+Enum(mips_arch_opt_value) String(4km) Value(37) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4kp) Value(37)
+Enum(mips_arch_opt_value) String(r4km) Value(37)
EnumValue
-Enum(mips_arch_opt_value) String(4ksc) Value(38) Canonical
+Enum(mips_arch_opt_value) String(4kp) Value(38) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4ksc) Value(38)
+Enum(mips_arch_opt_value) String(r4kp) Value(38)
EnumValue
-Enum(mips_arch_opt_value) String(m4k) Value(39) Canonical
+Enum(mips_arch_opt_value) String(4ksc) Value(39) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(m14kc) Value(40) Canonical
+Enum(mips_arch_opt_value) String(r4ksc) Value(39)
EnumValue
-Enum(mips_arch_opt_value) String(m14k) Value(41) Canonical
+Enum(mips_arch_opt_value) String(m4k) Value(40) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(4kec) Value(42) Canonical
+Enum(mips_arch_opt_value) String(m14kc) Value(41) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4kec) Value(42)
+Enum(mips_arch_opt_value) String(m14k) Value(42) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(4kem) Value(43) Canonical
+Enum(mips_arch_opt_value) String(m14ke) Value(43) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4kem) Value(43)
+Enum(mips_arch_opt_value) String(m14kec) Value(44) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(4kep) Value(44) Canonical
+Enum(mips_arch_opt_value) String(4kec) Value(45) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4kep) Value(44)
+Enum(mips_arch_opt_value) String(r4kec) Value(45)
EnumValue
-Enum(mips_arch_opt_value) String(4ksd) Value(45) Canonical
+Enum(mips_arch_opt_value) String(4kem) Value(46) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r4ksd) Value(45)
+Enum(mips_arch_opt_value) String(r4kem) Value(46)
EnumValue
-Enum(mips_arch_opt_value) String(24kc) Value(46) Canonical
+Enum(mips_arch_opt_value) String(4kep) Value(47) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kc) Value(46)
+Enum(mips_arch_opt_value) String(r4kep) Value(47)
EnumValue
-Enum(mips_arch_opt_value) String(24kf2_1) Value(47) Canonical
+Enum(mips_arch_opt_value) String(4ksd) Value(48) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kf2_1) Value(47)
+Enum(mips_arch_opt_value) String(r4ksd) Value(48)
EnumValue
-Enum(mips_arch_opt_value) String(24kf) Value(48) Canonical
+Enum(mips_arch_opt_value) String(24kc) Value(49) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kf) Value(48)
+Enum(mips_arch_opt_value) String(r24kc) Value(49)
EnumValue
-Enum(mips_arch_opt_value) String(24kf1_1) Value(49) Canonical
+Enum(mips_arch_opt_value) String(24kf2_1) Value(50) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kf1_1) Value(49)
+Enum(mips_arch_opt_value) String(r24kf2_1) Value(50)
EnumValue
-Enum(mips_arch_opt_value) String(24kfx) Value(50) Canonical
+Enum(mips_arch_opt_value) String(24kf) Value(51) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kfx) Value(50)
+Enum(mips_arch_opt_value) String(r24kf) Value(51)
EnumValue
-Enum(mips_arch_opt_value) String(24kx) Value(51) Canonical
+Enum(mips_arch_opt_value) String(24kf1_1) Value(52) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kx) Value(51)
+Enum(mips_arch_opt_value) String(r24kf1_1) Value(52)
EnumValue
-Enum(mips_arch_opt_value) String(24kec) Value(52) Canonical
+Enum(mips_arch_opt_value) String(24kfx) Value(53) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kec) Value(52)
+Enum(mips_arch_opt_value) String(r24kfx) Value(53)
EnumValue
-Enum(mips_arch_opt_value) String(24kef2_1) Value(53) Canonical
+Enum(mips_arch_opt_value) String(24kx) Value(54) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kef2_1) Value(53)
+Enum(mips_arch_opt_value) String(r24kx) Value(54)
EnumValue
-Enum(mips_arch_opt_value) String(24kef) Value(54) Canonical
+Enum(mips_arch_opt_value) String(24kec) Value(55) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kef) Value(54)
+Enum(mips_arch_opt_value) String(r24kec) Value(55)
EnumValue
-Enum(mips_arch_opt_value) String(24kef1_1) Value(55) Canonical
+Enum(mips_arch_opt_value) String(24kef2_1) Value(56) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kef1_1) Value(55)
+Enum(mips_arch_opt_value) String(r24kef2_1) Value(56)
EnumValue
-Enum(mips_arch_opt_value) String(24kefx) Value(56) Canonical
+Enum(mips_arch_opt_value) String(24kef) Value(57) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kefx) Value(56)
+Enum(mips_arch_opt_value) String(r24kef) Value(57)
EnumValue
-Enum(mips_arch_opt_value) String(24kex) Value(57) Canonical
+Enum(mips_arch_opt_value) String(24kef1_1) Value(58) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r24kex) Value(57)
+Enum(mips_arch_opt_value) String(r24kef1_1) Value(58)
EnumValue
-Enum(mips_arch_opt_value) String(34kc) Value(58) Canonical
+Enum(mips_arch_opt_value) String(24kefx) Value(59) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kc) Value(58)
+Enum(mips_arch_opt_value) String(r24kefx) Value(59)
EnumValue
-Enum(mips_arch_opt_value) String(34kf2_1) Value(59) Canonical
+Enum(mips_arch_opt_value) String(24kex) Value(60) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kf2_1) Value(59)
+Enum(mips_arch_opt_value) String(r24kex) Value(60)
EnumValue
-Enum(mips_arch_opt_value) String(34kf) Value(60) Canonical
+Enum(mips_arch_opt_value) String(34kc) Value(61) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kf) Value(60)
+Enum(mips_arch_opt_value) String(r34kc) Value(61)
EnumValue
-Enum(mips_arch_opt_value) String(34kf1_1) Value(61) Canonical
+Enum(mips_arch_opt_value) String(34kf2_1) Value(62) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kf1_1) Value(61)
+Enum(mips_arch_opt_value) String(r34kf2_1) Value(62)
EnumValue
-Enum(mips_arch_opt_value) String(34kfx) Value(62) Canonical
+Enum(mips_arch_opt_value) String(34kf) Value(63) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kfx) Value(62)
+Enum(mips_arch_opt_value) String(r34kf) Value(63)
EnumValue
-Enum(mips_arch_opt_value) String(34kx) Value(63) Canonical
+Enum(mips_arch_opt_value) String(34kf1_1) Value(64) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kx) Value(63)
+Enum(mips_arch_opt_value) String(r34kf1_1) Value(64)
EnumValue
-Enum(mips_arch_opt_value) String(34kn) Value(64) Canonical
+Enum(mips_arch_opt_value) String(34kfx) Value(65) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r34kn) Value(64)
+Enum(mips_arch_opt_value) String(r34kfx) Value(65)
EnumValue
-Enum(mips_arch_opt_value) String(74kc) Value(65) Canonical
+Enum(mips_arch_opt_value) String(34kx) Value(66) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kc) Value(65)
+Enum(mips_arch_opt_value) String(r34kx) Value(66)
EnumValue
-Enum(mips_arch_opt_value) String(74kf2_1) Value(66) Canonical
+Enum(mips_arch_opt_value) String(34kn) Value(67) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kf2_1) Value(66)
+Enum(mips_arch_opt_value) String(r34kn) Value(67)
EnumValue
-Enum(mips_arch_opt_value) String(74kf) Value(67) Canonical
+Enum(mips_arch_opt_value) String(74kc) Value(68) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kf) Value(67)
+Enum(mips_arch_opt_value) String(r74kc) Value(68)
EnumValue
-Enum(mips_arch_opt_value) String(74kf1_1) Value(68) Canonical
+Enum(mips_arch_opt_value) String(74kf2_1) Value(69) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kf1_1) Value(68)
+Enum(mips_arch_opt_value) String(r74kf2_1) Value(69)
EnumValue
-Enum(mips_arch_opt_value) String(74kfx) Value(69) Canonical
+Enum(mips_arch_opt_value) String(74kf) Value(70) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kfx) Value(69)
+Enum(mips_arch_opt_value) String(r74kf) Value(70)
EnumValue
-Enum(mips_arch_opt_value) String(74kx) Value(70) Canonical
+Enum(mips_arch_opt_value) String(74kf1_1) Value(71) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kx) Value(70)
+Enum(mips_arch_opt_value) String(r74kf1_1) Value(71)
EnumValue
-Enum(mips_arch_opt_value) String(74kf3_2) Value(71) Canonical
+Enum(mips_arch_opt_value) String(74kfx) Value(72) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r74kf3_2) Value(71)
+Enum(mips_arch_opt_value) String(r74kfx) Value(72)
EnumValue
-Enum(mips_arch_opt_value) String(1004kc) Value(72) Canonical
+Enum(mips_arch_opt_value) String(74kx) Value(73) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r1004kc) Value(72)
+Enum(mips_arch_opt_value) String(r74kx) Value(73)
EnumValue
-Enum(mips_arch_opt_value) String(1004kf2_1) Value(73) Canonical
+Enum(mips_arch_opt_value) String(74kf3_2) Value(74) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r1004kf2_1) Value(73)
+Enum(mips_arch_opt_value) String(r74kf3_2) Value(74)
EnumValue
-Enum(mips_arch_opt_value) String(1004kf) Value(74) Canonical
+Enum(mips_arch_opt_value) String(1004kc) Value(75) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r1004kf) Value(74)
+Enum(mips_arch_opt_value) String(r1004kc) Value(75)
EnumValue
-Enum(mips_arch_opt_value) String(1004kf1_1) Value(75) Canonical
+Enum(mips_arch_opt_value) String(1004kf2_1) Value(76) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r1004kf1_1) Value(75)
+Enum(mips_arch_opt_value) String(r1004kf2_1) Value(76)
EnumValue
-Enum(mips_arch_opt_value) String(5kc) Value(76) Canonical
+Enum(mips_arch_opt_value) String(1004kf) Value(77) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r5kc) Value(76)
+Enum(mips_arch_opt_value) String(r1004kf) Value(77)
EnumValue
-Enum(mips_arch_opt_value) String(5kf) Value(77) Canonical
+Enum(mips_arch_opt_value) String(1004kf1_1) Value(78) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r5kf) Value(77)
+Enum(mips_arch_opt_value) String(r1004kf1_1) Value(78)
EnumValue
-Enum(mips_arch_opt_value) String(20kc) Value(78) Canonical
+Enum(mips_arch_opt_value) String(5kc) Value(79) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(r20kc) Value(78)
+Enum(mips_arch_opt_value) String(r5kc) Value(79)
EnumValue
-Enum(mips_arch_opt_value) String(sb1) Value(79) Canonical
+Enum(mips_arch_opt_value) String(5kf) Value(80) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(sb1a) Value(80) Canonical
+Enum(mips_arch_opt_value) String(r5kf) Value(80)
EnumValue
-Enum(mips_arch_opt_value) String(sr71000) Value(81) Canonical
+Enum(mips_arch_opt_value) String(20kc) Value(81) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(sr71k) Value(81)
+Enum(mips_arch_opt_value) String(r20kc) Value(81)
EnumValue
-Enum(mips_arch_opt_value) String(xlr) Value(82) Canonical
+Enum(mips_arch_opt_value) String(sb1) Value(82) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(loongson3a) Value(83) Canonical
+Enum(mips_arch_opt_value) String(sb1a) Value(83) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(octeon) Value(84) Canonical
+Enum(mips_arch_opt_value) String(sr71000) Value(84) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(octeon+) Value(85) Canonical
+Enum(mips_arch_opt_value) String(sr71k) Value(84)
EnumValue
-Enum(mips_arch_opt_value) String(octeon2) Value(86) Canonical
+Enum(mips_arch_opt_value) String(xlr) Value(85) Canonical
EnumValue
-Enum(mips_arch_opt_value) String(xlp) Value(87) Canonical
+Enum(mips_arch_opt_value) String(loongson3a) Value(86) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon) Value(87) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon+) Value(88) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(octeon2) Value(89) Canonical
+
+EnumValue
+Enum(mips_arch_opt_value) String(xlp) Value(90) Canonical
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index 1f2774638fc..bd1d10b0e4e 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -43,7 +43,7 @@ along with GCC; see the file COPYING3. If not see
#include "tm_p.h"
#include "ggc.h"
#include "gstab.h"
-#include "hashtab.h"
+#include "hash-table.h"
#include "debug.h"
#include "target.h"
#include "target-def.h"
@@ -1029,6 +1029,19 @@ static const struct mips_rtx_cost_data
1, /* branch_cost */
4 /* memory_latency */
},
+ { /* R5900 */
+ COSTS_N_INSNS (4), /* fp_add */
+ COSTS_N_INSNS (4), /* fp_mult_sf */
+ COSTS_N_INSNS (256), /* fp_mult_df */
+ COSTS_N_INSNS (8), /* fp_div_sf */
+ COSTS_N_INSNS (256), /* fp_div_df */
+ COSTS_N_INSNS (4), /* int_mult_si */
+ COSTS_N_INSNS (256), /* int_mult_di */
+ COSTS_N_INSNS (37), /* int_div_si */
+ COSTS_N_INSNS (256), /* int_div_di */
+ 1, /* branch_cost */
+ 4 /* memory_latency */
+ },
{ /* R7000 */
/* The only costs that are changed here are
integer multiplication. */
@@ -1426,6 +1439,16 @@ mips_merge_decl_attributes (tree olddecl, tree newdecl)
return merge_attributes (DECL_ATTRIBUTES (olddecl),
DECL_ATTRIBUTES (newdecl));
}
+
+/* Implement TARGET_CAN_INLINE_P. */
+
+static bool
+mips_can_inline_p (tree caller, tree callee)
+{
+ if (mips_get_compress_mode (callee) != mips_get_compress_mode (caller))
+ return false;
+ return default_target_can_inline_p (caller, callee);
+}
/* If X is a PLUS of a CONST_INT, return the two terms in *BASE_PTR
and *OFFSET_PTR. Return X in *BASE_PTR and 0 in *OFFSET_PTR otherwise. */
@@ -12440,7 +12463,10 @@ mips_start_ll_sc_sync_block (void)
if (!ISA_HAS_LL_SC)
{
output_asm_insn (".set\tpush", 0);
- output_asm_insn (".set\tmips2", 0);
+ if (TARGET_64BIT)
+ output_asm_insn (".set\tmips3", 0);
+ else
+ output_asm_insn (".set\tmips2", 0);
}
}
@@ -12995,6 +13021,7 @@ mips_issue_rate (void)
case PROCESSOR_R4130:
case PROCESSOR_R5400:
case PROCESSOR_R5500:
+ case PROCESSOR_R5900:
case PROCESSOR_R7000:
case PROCESSOR_R9000:
case PROCESSOR_OCTEON:
@@ -15796,30 +15823,43 @@ mips_hash_base (rtx base)
return hash_rtx (base, GET_MODE (base), &do_not_record_p, NULL, false);
}
+/* Hashtable helpers. */
+
+struct mips_lo_sum_offset_hasher : typed_free_remove <mips_lo_sum_offset>
+{
+ typedef mips_lo_sum_offset value_type;
+ typedef rtx_def compare_type;
+ static inline hashval_t hash (const value_type *);
+ static inline bool equal (const value_type *, const compare_type *);
+};
+
/* Hash-table callbacks for mips_lo_sum_offsets. */
-static hashval_t
-mips_lo_sum_offset_hash (const void *entry)
+inline hashval_t
+mips_lo_sum_offset_hasher::hash (const value_type *entry)
{
- return mips_hash_base (((const struct mips_lo_sum_offset *) entry)->base);
+ return mips_hash_base (entry->base);
}
-static int
-mips_lo_sum_offset_eq (const void *entry, const void *value)
+inline bool
+mips_lo_sum_offset_hasher::equal (const value_type *entry,
+ const compare_type *value)
{
- return rtx_equal_p (((const struct mips_lo_sum_offset *) entry)->base,
- (const_rtx) value);
+ return rtx_equal_p (entry->base, value);
}
+typedef hash_table <mips_lo_sum_offset_hasher> mips_offset_table;
+
/* Look up symbolic constant X in HTAB, which is a hash table of
mips_lo_sum_offsets. If OPTION is NO_INSERT, return true if X can be
paired with a recorded LO_SUM, otherwise record X in the table. */
static bool
-mips_lo_sum_offset_lookup (htab_t htab, rtx x, enum insert_option option)
+mips_lo_sum_offset_lookup (mips_offset_table htab, rtx x,
+ enum insert_option option)
{
rtx base, offset;
- void **slot;
+ mips_lo_sum_offset **slot;
struct mips_lo_sum_offset *entry;
/* Split X into a base and offset. */
@@ -15828,7 +15868,7 @@ mips_lo_sum_offset_lookup (htab_t htab, rtx x, enum insert_option option)
base = UNSPEC_ADDRESS (base);
/* Look up the base in the hash table. */
- slot = htab_find_slot_with_hash (htab, base, mips_hash_base (base), option);
+ slot = htab.find_slot_with_hash (base, mips_hash_base (base), option);
if (slot == NULL)
return false;
@@ -15858,7 +15898,8 @@ static int
mips_record_lo_sum (rtx *loc, void *data)
{
if (GET_CODE (*loc) == LO_SUM)
- mips_lo_sum_offset_lookup ((htab_t) data, XEXP (*loc, 1), INSERT);
+ mips_lo_sum_offset_lookup (*(mips_offset_table*) data,
+ XEXP (*loc, 1), INSERT);
return 0;
}
@@ -15867,7 +15908,7 @@ mips_record_lo_sum (rtx *loc, void *data)
LO_SUMs in the current function. */
static bool
-mips_orphaned_high_part_p (htab_t htab, rtx insn)
+mips_orphaned_high_part_p (mips_offset_table htab, rtx insn)
{
enum mips_symbol_type type;
rtx x, set;
@@ -15975,7 +16016,7 @@ mips_reorg_process_insns (void)
{
rtx insn, last_insn, subinsn, next_insn, lo_reg, delayed_reg;
int hilo_delay;
- htab_t htab;
+ mips_offset_table htab;
/* Force all instructions to be split into their final form. */
split_all_insns_noflow ();
@@ -16001,8 +16042,9 @@ mips_reorg_process_insns (void)
cfun->machine->all_noreorder_p = false;
/* Code compiled with -mfix-vr4120 or -mfix-24k can't be all noreorder
- because we rely on the assembler to work around some errata. */
- if (TARGET_FIX_VR4120 || TARGET_FIX_24K)
+ because we rely on the assembler to work around some errata.
+ The r5900 too has several bugs. */
+ if (TARGET_FIX_VR4120 || TARGET_FIX_24K || TARGET_MIPS5900)
cfun->machine->all_noreorder_p = false;
/* The same is true for -mfix-vr4130 if we might generate MFLO or
@@ -16012,14 +16054,13 @@ mips_reorg_process_insns (void)
if (TARGET_FIX_VR4130 && !ISA_HAS_MACCHI)
cfun->machine->all_noreorder_p = false;
- htab = htab_create (37, mips_lo_sum_offset_hash,
- mips_lo_sum_offset_eq, free);
+ htab.create (37);
/* Make a first pass over the instructions, recording all the LO_SUMs. */
for (insn = get_insns (); insn != 0; insn = NEXT_INSN (insn))
FOR_EACH_SUBINSN (subinsn, insn)
if (USEFUL_INSN_P (subinsn))
- for_each_rtx (&PATTERN (subinsn), mips_record_lo_sum, htab);
+ for_each_rtx (&PATTERN (subinsn), mips_record_lo_sum, &htab);
last_insn = 0;
hilo_delay = 2;
@@ -16076,7 +16117,7 @@ mips_reorg_process_insns (void)
}
}
- htab_delete (htab);
+ htab.dispose ();
}
/* Return true if the function has a long branch instruction. */
@@ -18600,6 +18641,8 @@ mips_expand_vec_minmax (rtx target, rtx op0, rtx op1,
#define TARGET_INSERT_ATTRIBUTES mips_insert_attributes
#undef TARGET_MERGE_DECL_ATTRIBUTES
#define TARGET_MERGE_DECL_ATTRIBUTES mips_merge_decl_attributes
+#undef TARGET_CAN_INLINE_P
+#define TARGET_CAN_INLINE_P mips_can_inline_p
#undef TARGET_SET_CURRENT_FUNCTION
#define TARGET_SET_CURRENT_FUNCTION mips_set_current_function
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index 50a030f7f2c..d775a8c940b 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -222,6 +222,7 @@ struct mips_cpu_info {
#define TARGET_MIPS4130 (mips_arch == PROCESSOR_R4130)
#define TARGET_MIPS5400 (mips_arch == PROCESSOR_R5400)
#define TARGET_MIPS5500 (mips_arch == PROCESSOR_R5500)
+#define TARGET_MIPS5900 (mips_arch == PROCESSOR_R5900)
#define TARGET_MIPS7000 (mips_arch == PROCESSOR_R7000)
#define TARGET_MIPS9000 (mips_arch == PROCESSOR_R9000)
#define TARGET_OCTEON (mips_arch == PROCESSOR_OCTEON \
@@ -399,6 +400,9 @@ struct mips_cpu_info {
if (TARGET_MCU) \
builtin_define ("__mips_mcu"); \
\
+ if (TARGET_EVA) \
+ builtin_define ("__mips_eva"); \
+ \
if (TARGET_DSP) \
{ \
builtin_define ("__mips_dsp"); \
@@ -614,39 +618,25 @@ struct mips_cpu_info {
#endif
#ifndef MULTILIB_ISA_DEFAULT
-# if MIPS_ISA_DEFAULT == 1
-# define MULTILIB_ISA_DEFAULT "mips1"
-# else
-# if MIPS_ISA_DEFAULT == 2
-# define MULTILIB_ISA_DEFAULT "mips2"
-# else
-# if MIPS_ISA_DEFAULT == 3
-# define MULTILIB_ISA_DEFAULT "mips3"
-# else
-# if MIPS_ISA_DEFAULT == 4
-# define MULTILIB_ISA_DEFAULT "mips4"
-# else
-# if MIPS_ISA_DEFAULT == 32
-# define MULTILIB_ISA_DEFAULT "mips32"
-# else
-# if MIPS_ISA_DEFAULT == 33
-# define MULTILIB_ISA_DEFAULT "mips32r2"
-# else
-# if MIPS_ISA_DEFAULT == 64
-# define MULTILIB_ISA_DEFAULT "mips64"
-# else
-# if MIPS_ISA_DEFAULT == 65
-# define MULTILIB_ISA_DEFAULT "mips64r2"
-# else
-# define MULTILIB_ISA_DEFAULT "mips1"
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
-# endif
+#if MIPS_ISA_DEFAULT == 1
+#define MULTILIB_ISA_DEFAULT "mips1"
+#elif MIPS_ISA_DEFAULT == 2
+#define MULTILIB_ISA_DEFAULT "mips2"
+#elif MIPS_ISA_DEFAULT == 3
+#define MULTILIB_ISA_DEFAULT "mips3"
+#elif MIPS_ISA_DEFAULT == 4
+#define MULTILIB_ISA_DEFAULT "mips4"
+#elif MIPS_ISA_DEFAULT == 32
+#define MULTILIB_ISA_DEFAULT "mips32"
+#elif MIPS_ISA_DEFAULT == 33
+#define MULTILIB_ISA_DEFAULT "mips32r2"
+#elif MIPS_ISA_DEFAULT == 64
+#define MULTILIB_ISA_DEFAULT "mips64"
+#elif MIPS_ISA_DEFAULT == 65
+#define MULTILIB_ISA_DEFAULT "mips64r2"
+#else
+#define MULTILIB_ISA_DEFAULT "mips1"
+#endif
#endif
#ifndef MIPS_ABI_DEFAULT
@@ -657,21 +647,13 @@ struct mips_cpu_info {
#if MIPS_ABI_DEFAULT == ABI_32
#define MULTILIB_ABI_DEFAULT "mabi=32"
-#endif
-
-#if MIPS_ABI_DEFAULT == ABI_O64
+#elif MIPS_ABI_DEFAULT == ABI_O64
#define MULTILIB_ABI_DEFAULT "mabi=o64"
-#endif
-
-#if MIPS_ABI_DEFAULT == ABI_N32
+#elif MIPS_ABI_DEFAULT == ABI_N32
#define MULTILIB_ABI_DEFAULT "mabi=n32"
-#endif
-
-#if MIPS_ABI_DEFAULT == ABI_64
+#elif MIPS_ABI_DEFAULT == ABI_64
#define MULTILIB_ABI_DEFAULT "mabi=64"
-#endif
-
-#if MIPS_ABI_DEFAULT == ABI_EABI
+#elif MIPS_ABI_DEFAULT == ABI_EABI
#define MULTILIB_ABI_DEFAULT "mabi=eabi"
#endif
@@ -743,9 +725,9 @@ struct mips_cpu_info {
#define MIPS_ISA_SYNCI_SPEC \
"%{msynci|mno-synci:;:%{mips32r2|mips64r2:-msynci;:-mno-synci}}"
-#if MIPS_ABI_DEFAULT == ABI_O64 \
- || MIPS_ABI_DEFAULT == ABI_N32 \
- || MIPS_ABI_DEFAULT == ABI_64
+#if (MIPS_ABI_DEFAULT == ABI_O64 \
+ || MIPS_ABI_DEFAULT == ABI_N32 \
+ || MIPS_ABI_DEFAULT == ABI_64)
#define OPT_ARCH64 "mabi=32|mgp32:;"
#define OPT_ARCH32 "mabi=32|mgp32"
#else
@@ -781,7 +763,7 @@ struct mips_cpu_info {
#define BASE_DRIVER_SELF_SPECS \
"%{!mno-dsp: \
%{march=24ke*|march=34kc*|march=34kf*|march=34kx*|march=1004k*: -mdsp} \
- %{march=74k*:%{!mno-dspr2: -mdspr2 -mdsp}}}"
+ %{march=74k*|march=m14ke*: %{!mno-dspr2: -mdspr2 -mdsp}}}"
#define DRIVER_SELF_SPECS BASE_DRIVER_SELF_SPECS
@@ -825,6 +807,7 @@ struct mips_cpu_info {
#define ISA_HAS_MUL3 ((TARGET_MIPS3900 \
|| TARGET_MIPS5400 \
|| TARGET_MIPS5500 \
+ || TARGET_MIPS5900 \
|| TARGET_MIPS7000 \
|| TARGET_MIPS9000 \
|| TARGET_MAD \
@@ -839,6 +822,26 @@ struct mips_cpu_info {
&& TARGET_OCTEON \
&& !TARGET_MIPS16)
+/* ISA supports instructions DMULT and DMULTU. */
+#define ISA_HAS_DMULT (TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions MULT and MULTU.
+ This is always true, but the macro is needed for ISA_HAS_<D>MULT
+ in mips.md. */
+#define ISA_HAS_MULT (1)
+
+/* ISA supports instructions DDIV and DDIVU. */
+#define ISA_HAS_DDIV (TARGET_64BIT && !TARGET_MIPS5900)
+
+/* ISA supports instructions DIV and DIVU.
+ This is always true, but the macro is needed for ISA_HAS_<D>DIV
+ in mips.md. */
+#define ISA_HAS_DIV (1)
+
+#define ISA_HAS_DIV3 ((TARGET_LOONGSON_2EF \
+ || TARGET_LOONGSON_3A) \
+ && !TARGET_MIPS16)
+
/* ISA has the floating-point conditional move instructions introduced
in mips4. */
#define ISA_HAS_FP_CONDMOVE ((ISA_MIPS4 \
@@ -851,7 +854,9 @@ struct mips_cpu_info {
/* ISA has the integer conditional move instructions introduced in mips4 and
ST Loongson 2E/2F. */
-#define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE || TARGET_LOONGSON_2EF)
+#define ISA_HAS_CONDMOVE (ISA_HAS_FP_CONDMOVE \
+ || TARGET_MIPS5900 \
+ || TARGET_LOONGSON_2EF)
/* ISA has LDC1 and SDC1. */
#define ISA_HAS_LDC1_SDC1 (!ISA_MIPS1 && !TARGET_MIPS16)
@@ -964,6 +969,7 @@ struct mips_cpu_info {
/* ISA has data prefetch instructions. This controls use of 'pref'. */
#define ISA_HAS_PREFETCH ((ISA_MIPS4 \
|| TARGET_LOONGSON_2EF \
+ || TARGET_MIPS5900 \
|| ISA_MIPS32 \
|| ISA_MIPS32R2 \
|| ISA_MIPS64 \
@@ -1025,15 +1031,18 @@ struct mips_cpu_info {
and "addiu $4,$4,1". */
#define ISA_HAS_LOAD_DELAY (ISA_MIPS1 \
&& !TARGET_MIPS3900 \
+ && !TARGET_MIPS5900 \
&& !TARGET_MIPS16 \
&& !TARGET_MICROMIPS)
/* Likewise mtc1 and mfc1. */
#define ISA_HAS_XFER_DELAY (mips_isa <= 3 \
+ && !TARGET_MIPS5900 \
&& !TARGET_LOONGSON_2EF)
/* Likewise floating-point comparisons. */
#define ISA_HAS_FCMP_DELAY (mips_isa <= 3 \
+ && !TARGET_MIPS5900 \
&& !TARGET_LOONGSON_2EF)
/* True if mflo and mfhi can be immediately followed by instructions
@@ -1053,6 +1062,7 @@ struct mips_cpu_info {
|| ISA_MIPS64 \
|| ISA_MIPS64R2 \
|| TARGET_MIPS5500 \
+ || TARGET_MIPS5900 \
|| TARGET_LOONGSON_2EF)
/* ISA includes synci, jr.hb and jalr.hb. */
@@ -1070,7 +1080,7 @@ struct mips_cpu_info {
/* ISA includes ll and sc. Note that this implies ISA_HAS_SYNC
because the expanders use both ISA_HAS_SYNC and ISA_HAS_LL_SC
instructions. */
-#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS16)
+#define ISA_HAS_LL_SC (mips_isa >= 2 && !TARGET_MIPS5900 && !TARGET_MIPS16)
#define GENERATE_LL_SC \
(target_flags_explicit & MASK_LLSC \
? TARGET_LLSC && !TARGET_MIPS16 \
@@ -1143,6 +1153,7 @@ struct mips_cpu_info {
%{mdsp} %{mno-dsp} \
%{mdspr2} %{mno-dspr2} \
%{mmcu} %{mno-mcu} \
+%{meva} %{mno-eva} \
%{msmartmips} %{mno-smartmips} \
%{mmt} %{mno-mt} \
%{mfix-vr4120} %{mfix-vr4130} \
@@ -1361,8 +1372,8 @@ struct mips_cpu_info {
#define MAX_FIXED_MODE_SIZE LONG_DOUBLE_TYPE_SIZE
#ifdef IN_LIBGCC2
-#if (defined _ABIN32 && _MIPS_SIM == _ABIN32) \
- || (defined _ABI64 && _MIPS_SIM == _ABI64)
+#if ((defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+ || (defined _ABI64 && _MIPS_SIM == _ABI64))
# define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 128
# else
# define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
@@ -2868,9 +2879,8 @@ while (0)
jal " USER_LABEL_PREFIX #FUNC "\n\
.set pop\n\
" TEXT_SECTION_ASM_OP);
-#endif /* Switch to #elif when we're no longer limited by K&R C. */
-#if (defined _ABIN32 && _MIPS_SIM == _ABIN32) \
- || (defined _ABI64 && _MIPS_SIM == _ABI64)
+#elif ((defined _ABIN32 && _MIPS_SIM == _ABIN32) \
+ || (defined _ABI64 && _MIPS_SIM == _ABI64))
#define CRT_CALL_STATIC_FUNCTION(SECTION_OP, FUNC) \
asm (SECTION_OP "\n\
.set push\n\
diff --git a/gcc/config/mips/mips.md b/gcc/config/mips/mips.md
index 7284e5f3384..b832dda27f0 100644
--- a/gcc/config/mips/mips.md
+++ b/gcc/config/mips/mips.md
@@ -55,6 +55,7 @@
r5000
r5400
r5500
+ r5900
r7000
r8000
r9000
@@ -406,8 +407,12 @@
;; Is this an extended instruction in mips16 mode?
(define_attr "extended_mips16" "no,yes"
- (if_then_else (ior (eq_attr "move_type" "sll0")
- (eq_attr "jal" "direct"))
+ (if_then_else (ior ;; In general, constant-pool loads are extended
+ ;; instructions. We don't yet optimize for 16-bit
+ ;; PC-relative references.
+ (eq_attr "move_type" "sll0,loadpool")
+ (eq_attr "jal" "direct")
+ (eq_attr "got" "load"))
(const_string "yes")
(const_string "no")))
@@ -420,14 +425,89 @@
(match_test "TARGET_MICROMIPS")))
(const_string "yes")
(const_string "no")))
-
-;; Length of instruction in bytes.
-(define_attr "length" ""
- (cond [(and (eq_attr "extended_mips16" "yes")
- (match_test "TARGET_MIPS16"))
- (const_int 4)
- (and (eq_attr "compression" "micromips,all")
+;; The number of individual instructions that a non-branch pattern generates,
+;; using units of BASE_INSN_LENGTH.
+(define_attr "insn_count" ""
+ (cond [;; "Ghost" instructions occupy no space.
+ (eq_attr "type" "ghost")
+ (const_int 0)
+
+ ;; Extended instructions count as 2.
+ (and (eq_attr "extended_mips16" "yes")
+ (match_test "TARGET_MIPS16"))
+ (const_int 2)
+
+ ;; A GOT load followed by an add of $gp. This is not used for MIPS16.
+ (eq_attr "got" "xgot_high")
+ (const_int 2)
+
+ ;; SHIFT_SHIFTs are decomposed into two separate instructions.
+ ;; They are extended instructions on MIPS16 targets.
+ (eq_attr "move_type" "shift_shift")
+ (if_then_else (match_test "TARGET_MIPS16")
+ (const_int 4)
+ (const_int 2))
+
+ ;; Check for doubleword moves that are decomposed into two
+ ;; instructions. The individual instructions are unextended
+ ;; MIPS16 ones.
+ (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
+ (eq_attr "dword_mode" "yes"))
+ (const_int 2)
+
+ ;; Constants, loads and stores are handled by external routines.
+ (and (eq_attr "move_type" "const,constN")
+ (eq_attr "dword_mode" "yes"))
+ (symbol_ref "mips_split_const_insns (operands[1])")
+ (eq_attr "move_type" "const,constN")
+ (symbol_ref "mips_const_insns (operands[1])")
+ (eq_attr "move_type" "load,fpload")
+ (symbol_ref "mips_load_store_insns (operands[1], insn)")
+ (eq_attr "move_type" "store,fpstore")
+ (symbol_ref "mips_load_store_insns (operands[0], insn)
+ + (TARGET_FIX_24K ? 1 : 0)")
+
+ ;; In the worst case, a call macro will take 8 instructions:
+ ;;
+ ;; lui $25,%call_hi(FOO)
+ ;; addu $25,$25,$28
+ ;; lw $25,%call_lo(FOO)($25)
+ ;; nop
+ ;; jalr $25
+ ;; nop
+ ;; lw $gp,X($sp)
+ ;; nop
+ (eq_attr "jal_macro" "yes")
+ (const_int 8)
+
+ ;; Various VR4120 errata require a nop to be inserted after a macc
+ ;; instruction. The assembler does this for us, so account for
+ ;; the worst-case length here.
+ (and (eq_attr "type" "imadd")
+ (match_test "TARGET_FIX_VR4120"))
+ (const_int 2)
+
+ ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
+ ;; the result of the second one is missed. The assembler should work
+ ;; around this by inserting a nop after the first dmult.
+ (and (eq_attr "type" "imul,imul3")
+ (eq_attr "mode" "DI")
+ (match_test "TARGET_FIX_VR4120"))
+ (const_int 2)
+
+ (eq_attr "type" "idiv,idiv3")
+ (symbol_ref "mips_idiv_insns ()")
+
+ (not (eq_attr "sync_mem" "none"))
+ (symbol_ref "mips_sync_loop_insns (insn, operands)")]
+ (const_int 1)))
+
+;; Length of instruction in bytes. The default is derived from "insn_count",
+;; but there are special cases for branches (which must be handled here)
+;; and for compressed single instructions.
+(define_attr "length" ""
+ (cond [(and (eq_attr "compression" "micromips,all")
(eq_attr "dword_mode" "no")
(match_test "TARGET_MICROMIPS"))
(const_int 2)
@@ -580,95 +660,8 @@
(const_int 20)
(match_test "Pmode == SImode")
(const_int 16)
- ] (const_int 24))
-
- ;; "Ghost" instructions occupy no space.
- (eq_attr "type" "ghost")
- (const_int 0)
-
- ;; GOT loads are extended MIPS16 instructions and 4-byte
- ;; microMIPS instructions.
- (eq_attr "got" "load")
- (const_int 4)
-
- ;; A GOT load followed by an add of $gp.
- (eq_attr "got" "xgot_high")
- (const_int 8)
-
- ;; In general, constant-pool loads are extended instructions.
- (eq_attr "move_type" "loadpool")
- (const_int 4)
-
- ;; SHIFT_SHIFTs are decomposed into two separate instructions.
- ;; They are extended instructions on MIPS16 targets.
- (eq_attr "move_type" "shift_shift")
- (const_int 8)
-
- ;; Check for doubleword moves that are decomposed into two
- ;; instructions. The individual instructions are unextended
- ;; MIPS16 ones or 2-byte microMIPS ones.
- (and (eq_attr "move_type" "mtc,mfc,mtlo,mflo,move")
- (eq_attr "dword_mode" "yes"))
- (if_then_else (match_test "TARGET_COMPRESSION")
- (const_int 4)
- (const_int 8))
-
- ;; Doubleword CONST{,N} moves are split into two word
- ;; CONST{,N} moves.
- (and (eq_attr "move_type" "const,constN")
- (eq_attr "dword_mode" "yes"))
- (symbol_ref "mips_split_const_insns (operands[1]) * BASE_INSN_LENGTH")
-
- ;; Otherwise, constants, loads and stores are handled by external
- ;; routines.
- (eq_attr "move_type" "const,constN")
- (symbol_ref "mips_const_insns (operands[1]) * BASE_INSN_LENGTH")
- (eq_attr "move_type" "load,fpload")
- (symbol_ref "mips_load_store_insns (operands[1], insn)
- * BASE_INSN_LENGTH")
- (eq_attr "move_type" "store,fpstore")
- (symbol_ref "mips_load_store_insns (operands[0], insn)
- * BASE_INSN_LENGTH
- + (TARGET_FIX_24K ? NOP_INSN_LENGTH : 0)")
-
- ;; In the worst case, a call macro will take 8 instructions:
- ;;
- ;; lui $25,%call_hi(FOO)
- ;; addu $25,$25,$28
- ;; lw $25,%call_lo(FOO)($25)
- ;; nop
- ;; jalr $25
- ;; nop
- ;; lw $gp,X($sp)
- ;; nop
- (eq_attr "jal_macro" "yes")
- (const_int 32)
-
- ;; Various VR4120 errata require a nop to be inserted after a macc
- ;; instruction. The assembler does this for us, so account for
- ;; the worst-case length here.
- (and (eq_attr "type" "imadd")
- (match_test "TARGET_FIX_VR4120"))
- (const_int 8)
-
- ;; VR4120 errata MD(4): if there are consecutive dmult instructions,
- ;; the result of the second one is missed. The assembler should work
- ;; around this by inserting a nop after the first dmult.
- (and (eq_attr "type" "imul,imul3")
- (and (eq_attr "mode" "DI")
- (match_test "TARGET_FIX_VR4120")))
- (const_int 8)
-
- (eq_attr "type" "idiv,idiv3")
- (symbol_ref "mips_idiv_insns () * BASE_INSN_LENGTH")
-
- (not (eq_attr "sync_mem" "none"))
- (symbol_ref "mips_sync_loop_insns (insn, operands)
- * BASE_INSN_LENGTH")
-
- (match_test "TARGET_MIPS16")
- (const_int 2)
- ] (const_int 4)))
+ ] (const_int 24))]
+ (symbol_ref "get_attr_insn_count (insn) * BASE_INSN_LENGTH")))
;; Attribute describing the processor.
(define_enum_attr "cpu" "processor"
@@ -701,16 +694,11 @@
(const_string "hilo")]
(const_string "none")))
-;; Is it a single instruction?
-(define_attr "single_insn" "no,yes"
- (symbol_ref "(get_attr_length (insn) == (TARGET_MIPS16 ? 2 : 4)
- ? SINGLE_INSN_YES : SINGLE_INSN_NO)"))
-
;; Can the instruction be put into a delay slot?
(define_attr "can_delay" "no,yes"
(if_then_else (and (eq_attr "type" "!branch,call,jump")
- (and (eq_attr "hazard" "none")
- (eq_attr "single_insn" "yes")))
+ (eq_attr "hazard" "none")
+ (match_test "get_attr_insn_count (insn) == 1"))
(const_string "yes")
(const_string "no")))
@@ -755,7 +743,9 @@
;; This mode iterator allows :MOVECC to be used anywhere that a
;; conditional-move-type condition is needed.
(define_mode_iterator MOVECC [SI (DI "TARGET_64BIT")
- (CC "TARGET_HARD_FLOAT && !TARGET_LOONGSON_2EF")])
+ (CC "TARGET_HARD_FLOAT
+ && !TARGET_LOONGSON_2EF
+ && !TARGET_MIPS5900")])
;; 32-bit integer moves for which we provide move patterns.
(define_mode_iterator IMOVE32
@@ -1417,7 +1407,7 @@
"mul.<fmt>\t%0,%1,%2\;nop"
[(set_attr "type" "fmul")
(set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
(define_insn "mulv2sf3"
[(set (match_operand:V2SF 0 "register_operand" "=f")
@@ -1478,7 +1468,7 @@
[(set (match_operand:GPR 0 "register_operand")
(mult:GPR (match_operand:GPR 1 "register_operand")
(match_operand:GPR 2 "register_operand")))]
- ""
+ "ISA_HAS_<D>MULT"
{
rtx lo;
@@ -1524,7 +1514,7 @@
{
if (which_alternative == 1)
return "<d>mult\t%1,%2";
- if (<MODE>mode == SImode && TARGET_MIPS3900)
+ if (<MODE>mode == SImode && (TARGET_MIPS3900 || TARGET_MIPS5900))
return "mult\t%0,%1,%2";
return "<d>mul\t%0,%1,%2";
}
@@ -1558,7 +1548,7 @@
[(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
(mult:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d")))]
- "!TARGET_FIX_R4000"
+ "ISA_HAS_<D>MULT && !TARGET_FIX_R4000"
"<d>mult\t%1,%2"
[(set_attr "type" "imul")
(set_attr "mode" "<MODE>")])
@@ -1568,11 +1558,11 @@
(mult:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d")))
(clobber (match_scratch:GPR 3 "=l"))]
- "TARGET_FIX_R4000"
+ "ISA_HAS_<D>MULT && TARGET_FIX_R4000"
"<d>mult\t%1,%2\;mflo\t%0"
[(set_attr "type" "imul")
(set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
;; On the VR4120 and VR4130, it is better to use "mtlo $0; macc" instead
;; of "mult; mflo". They have the same latency, but the first form gives
@@ -1632,7 +1622,7 @@
[(set_attr "type" "imadd")
(set_attr "accum_in" "3")
(set_attr "mode" "SI")
- (set_attr "length" "4,8")])
+ (set_attr "insn_count" "1,2")])
;; The same idea applies here. The middle alternative needs one less
;; clobber than the final alternative, so we add "*?" as a counterweight.
@@ -1651,7 +1641,7 @@
[(set_attr "type" "imadd")
(set_attr "accum_in" "3")
(set_attr "mode" "SI")
- (set_attr "length" "4,4,8")])
+ (set_attr "insn_count" "1,1,2")])
;; Split *mul_acc_si if both the source and destination accumulator
;; values are GPRs.
@@ -1732,7 +1722,7 @@
""
[(set_attr "type" "imadd")
(set_attr "accum_in" "1")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
;; Patterns generated by the define_peephole2 below.
@@ -1868,7 +1858,7 @@
[(set_attr "type" "imadd")
(set_attr "accum_in" "1")
(set_attr "mode" "SI")
- (set_attr "length" "4,8")])
+ (set_attr "insn_count" "1,2")])
;; Split *mul_sub_si if both the source and destination accumulator
;; values are GPRs.
@@ -1949,7 +1939,7 @@
"mult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
[(set_attr "type" "imul")
(set_attr "mode" "SI")
- (set_attr "length" "12")])
+ (set_attr "insn_count" "3")])
(define_insn_and_split "<u>mulsidi3_64bit"
[(set (match_operand:DI 0 "register_operand" "=d")
@@ -1968,10 +1958,10 @@
}
[(set_attr "type" "imul")
(set_attr "mode" "SI")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "ISA_HAS_EXT_INS")
- (const_int 16)
- (const_int 28)))])
+ (const_int 4)
+ (const_int 7)))])
(define_expand "<u>mulsidi3_64bit_mips16"
[(set (match_operand:DI 0 "register_operand")
@@ -2035,7 +2025,7 @@
(mult:DI (sign_extend:DI (match_operand:SI 1 "register_operand" "d"))
(sign_extend:DI (match_operand:SI 2 "register_operand" "d"))))
(clobber (match_scratch:DI 3 "=l"))]
- "TARGET_64BIT && ISA_HAS_DMUL3"
+ "ISA_HAS_DMUL3"
"dmul\t%0,%1,%2"
[(set_attr "type" "imul3")
(set_attr "mode" "DI")])
@@ -2122,7 +2112,7 @@
}
[(set_attr "type" "imul")
(set_attr "mode" "SI")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
(define_expand "<su>mulsi3_highpart_split"
[(set (match_operand:SI 0 "register_operand")
@@ -2189,7 +2179,7 @@
(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
(any_extend:TI (match_operand:DI 2 "register_operand")))
(const_int 64))))]
- "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+ "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
{
if (TARGET_MIPS16)
emit_insn (gen_<su>muldi3_highpart_split (operands[0], operands[1],
@@ -2208,7 +2198,7 @@
(any_extend:TI (match_operand:DI 2 "register_operand" "d")))
(const_int 64))))
(clobber (match_scratch:DI 3 "=l"))]
- "TARGET_64BIT
+ "ISA_HAS_DMULT
&& !TARGET_MIPS16
&& !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
{ return TARGET_FIX_R4000 ? "dmult<u>\t%1,%2\n\tmfhi\t%0" : "#"; }
@@ -2221,7 +2211,7 @@
}
[(set_attr "type" "imul")
(set_attr "mode" "DI")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
(define_expand "<su>muldi3_highpart_split"
[(set (match_operand:DI 0 "register_operand")
@@ -2244,7 +2234,7 @@
[(set (match_operand:TI 0 "register_operand")
(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand"))
(any_extend:TI (match_operand:DI 2 "register_operand"))))]
- "TARGET_64BIT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
+ "ISA_HAS_DMULT && !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
{
rtx hilo;
@@ -2266,7 +2256,7 @@
[(set (match_operand:TI 0 "muldiv_target_operand" "=x")
(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
(any_extend:TI (match_operand:DI 2 "register_operand" "d"))))]
- "TARGET_64BIT
+ "ISA_HAS_DMULT
&& !TARGET_FIX_R4000
&& !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
"dmult<u>\t%1,%2"
@@ -2278,13 +2268,13 @@
(mult:TI (any_extend:TI (match_operand:DI 1 "register_operand" "d"))
(any_extend:TI (match_operand:DI 2 "register_operand" "d"))))
(clobber (match_scratch:TI 3 "=x"))]
- "TARGET_64BIT
+ "ISA_HAS_DMULT
&& TARGET_FIX_R4000
&& !(<CODE> == ZERO_EXTEND && TARGET_FIX_VR4120)"
"dmult<u>\t%1,%2\;mflo\t%L0\;mfhi\t%M0"
[(set_attr "type" "imul")
(set_attr "mode" "DI")
- (set_attr "length" "12")])
+ (set_attr "insn_count" "3")])
;; The R4650 supports a 32-bit multiply/ 64-bit accumulate
;; instruction. The HI/LO registers are used as a 64-bit accumulator.
@@ -2535,10 +2525,10 @@
}
[(set_attr "type" "fdiv")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
(define_insn "*recip<mode>3"
[(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2553,92 +2543,64 @@
}
[(set_attr "type" "frdiv")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
;; VR4120 errata MD(A1): signed division instructions do not work correctly
;; with negative operands. We use special libgcc functions instead.
-(define_expand "divmod<mode>4"
- [(set (match_operand:GPR 0 "register_operand")
- (div:GPR (match_operand:GPR 1 "register_operand")
- (match_operand:GPR 2 "register_operand")))
- (set (match_operand:GPR 3 "register_operand")
- (mod:GPR (match_dup 1)
- (match_dup 2)))]
- "!TARGET_FIX_VR4120"
-{
- if (TARGET_MIPS16)
- {
- emit_insn (gen_divmod<mode>4_split (operands[3], operands[1],
- operands[2]));
- emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
- }
- else
- emit_insn (gen_divmod<mode>4_internal (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
-})
-
-(define_insn_and_split "divmod<mode>4_internal"
- [(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
+;;
+;; Expand generates divmod instructions for individual division and modulus
+;; operations. We then rely on CSE to reuse earlier divmods where possible.
+;; This means that, when generating MIPS16 code, it is better not to expose
+;; the fixed LO register until after CSE has finished. However, it's still
+;; better to split before register allocation, so that we don't allocate
+;; one of the scarce MIPS16 registers to an unused result.
+(define_insn_and_split "divmod<mode>4"
+ [(set (match_operand:GPR 0 "register_operand" "=kl")
(div:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d")))
(set (match_operand:GPR 3 "register_operand" "=d")
(mod:GPR (match_dup 1)
(match_dup 2)))]
- "!TARGET_FIX_VR4120 && !TARGET_MIPS16"
+ "ISA_HAS_<D>DIV && !TARGET_FIX_VR4120"
"#"
- "&& reload_completed"
+ "&& ((TARGET_MIPS16 && cse_not_expected) || reload_completed)"
[(const_int 0)]
{
emit_insn (gen_divmod<mode>4_split (operands[3], operands[1], operands[2]));
+ if (TARGET_MIPS16)
+ emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
DONE;
}
[(set_attr "type" "idiv")
(set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ ;; Worst case for MIPS16.
+ (set_attr "insn_count" "3")])
-(define_expand "udivmod<mode>4"
- [(set (match_operand:GPR 0 "register_operand")
- (udiv:GPR (match_operand:GPR 1 "register_operand")
- (match_operand:GPR 2 "register_operand")))
- (set (match_operand:GPR 3 "register_operand")
- (umod:GPR (match_dup 1)
- (match_dup 2)))]
- ""
-{
- if (TARGET_MIPS16)
- {
- emit_insn (gen_udivmod<mode>4_split (operands[3], operands[1],
- operands[2]));
- emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
- }
- else
- emit_insn (gen_udivmod<mode>4_internal (operands[0], operands[1],
- operands[2], operands[3]));
- DONE;
-})
-
-(define_insn_and_split "udivmod<mode>4_internal"
- [(set (match_operand:GPR 0 "muldiv_target_operand" "=l")
+;; See the comment above "divmod<mode>4" for the MIPS16 handling.
+(define_insn_and_split "udivmod<mode>4"
+ [(set (match_operand:GPR 0 "register_operand" "=kl")
(udiv:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d")))
(set (match_operand:GPR 3 "register_operand" "=d")
(umod:GPR (match_dup 1)
(match_dup 2)))]
- "!TARGET_MIPS16"
+ "ISA_HAS_<D>DIV"
"#"
- "reload_completed"
+ "(TARGET_MIPS16 && cse_not_expected) || reload_completed"
[(const_int 0)]
{
emit_insn (gen_udivmod<mode>4_split (operands[3], operands[1], operands[2]));
+ if (TARGET_MIPS16)
+ emit_move_insn (operands[0], gen_rtx_REG (<MODE>mode, LO_REGNUM));
DONE;
}
- [(set_attr "type" "idiv")
- (set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ [(set_attr "type" "idiv")
+ (set_attr "mode" "<MODE>")
+ ;; Worst case for MIPS16.
+ (set_attr "insn_count" "3")])
(define_expand "<u>divmod<mode>4_split"
[(set (match_operand:GPR 0 "register_operand")
@@ -2671,7 +2633,7 @@
[(any_div:GPR (match_operand:GPR 1 "register_operand" "d")
(match_operand:GPR 2 "register_operand" "d"))]
UNSPEC_SET_HILO))]
- ""
+ "ISA_HAS_<GPR:D>DIV"
{ return mips_output_division ("<GPR:d>div<u>\t%.,%1,%2", operands); }
[(set_attr "type" "idiv")
(set_attr "mode" "<GPR:MODE>")])
@@ -2698,10 +2660,10 @@
}
[(set_attr "type" "fsqrt")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
(define_insn "*rsqrt<mode>a"
[(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2716,10 +2678,10 @@
}
[(set_attr "type" "frsqrt")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
(define_insn "*rsqrt<mode>b"
[(set (match_operand:ANYF 0 "register_operand" "=f")
@@ -2734,10 +2696,10 @@
}
[(set_attr "type" "frsqrt")
(set_attr "mode" "<UNITMODE>")
- (set (attr "length")
+ (set (attr "insn_count")
(if_then_else (match_test "TARGET_FIX_SB1")
- (const_int 8)
- (const_int 4)))])
+ (const_int 2)
+ (const_int 1)))])
;;
;; ....................
@@ -3530,7 +3492,7 @@
[(set_attr "type" "fcvt")
(set_attr "mode" "DF")
(set_attr "cnv_mode" "D2I")
- (set_attr "length" "36")])
+ (set_attr "insn_count" "9")])
(define_expand "fix_truncsfsi2"
[(set (match_operand:SI 0 "register_operand")
@@ -3567,7 +3529,7 @@
[(set_attr "type" "fcvt")
(set_attr "mode" "SF")
(set_attr "cnv_mode" "S2I")
- (set_attr "length" "36")])
+ (set_attr "insn_count" "9")])
(define_insn "fix_truncdfdi2"
@@ -4045,7 +4007,7 @@
operands[2] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
operands[3] = mips_unspec_address (operands[1], SYMBOL_64_MID);
}
- [(set_attr "length" "20")])
+ [(set_attr "insn_count" "5")])
;; Use a scratch register to reduce the latency of the above pattern
;; on superscalar machines. The optimized sequence is:
@@ -4100,7 +4062,7 @@
operands[3] = mips_unspec_address (operands[1], SYMBOL_64_HIGH);
operands[4] = mips_unspec_address (operands[1], SYMBOL_64_LOW);
}
- [(set_attr "length" "24")])
+ [(set_attr "insn_count" "6")])
;; Split HIGHs into:
;;
@@ -5110,7 +5072,7 @@
return ".cprestore\t%1";
}
[(set_attr "type" "store")
- (set_attr "length" "4,12")])
+ (set_attr "insn_count" "1,3")])
(define_insn "use_cprestore_<mode>"
[(set (reg:P CPRESTORE_SLOT_REGNUM)
@@ -5171,7 +5133,7 @@
"\tjr.hb\t$31\n"
"\tnop%>%)";
}
- [(set_attr "length" "20")])
+ [(set_attr "insn_count" "5")])
;; Cache operations for R4000-style caches.
(define_insn "mips_cache"
@@ -5364,8 +5326,7 @@
;; not have and immediate). We recognize a shift of a load in order
;; to make it simple enough for combine to understand.
;;
-;; The length here is the worst case: the length of the split version
-;; will be more accurate.
+;; The instruction count here is the worst case.
(define_insn_and_split ""
[(set (match_operand:SI 0 "register_operand" "=d")
(lshiftrt:SI (match_operand:SI 1 "memory_operand" "m")
@@ -5378,7 +5339,8 @@
""
[(set_attr "type" "load")
(set_attr "mode" "SI")
- (set_attr "length" "8")])
+ (set (attr "insn_count")
+ (symbol_ref "mips_load_store_insns (operands[1], insn) + 2"))])
(define_insn "rotr<mode>3"
[(set (match_operand:GPR 0 "register_operand" "=d")
@@ -5986,7 +5948,7 @@
(clobber (reg:SI MIPS16_T_REGNUM))]
"TARGET_MIPS16_SHORT_JUMP_TABLES"
{
- rtx diff_vec = PATTERN (next_real_insn (operands[2]));
+ rtx diff_vec = PATTERN (NEXT_INSN (operands[2]));
gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
@@ -6017,7 +5979,7 @@
return "j\t%4";
}
- [(set_attr "length" "32")])
+ [(set_attr "insn_count" "16")])
;; For TARGET_USE_GOT, we save the gp in the jmp_buf as well.
;; While it is possible to either pull it off the stack (in the
@@ -6908,11 +6870,8 @@
(set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
""
[(set_attr "type" "unknown")
- ; Since rdhwr always generates a trap for now, putting it in a delay
- ; slot would make the kernel's emulation of it much slower.
- (set_attr "can_delay" "no")
(set_attr "mode" "<MODE>")
- (set_attr "length" "8")])
+ (set_attr "insn_count" "2")])
(define_insn "*tls_get_tp_<mode>_split"
[(set (reg:P TLS_GET_TP_REGNUM)
@@ -6920,7 +6879,8 @@
"HAVE_AS_TLS && !TARGET_MIPS16"
".set\tpush\;.set\tmips32r2\t\;rdhwr\t$3,$29\;.set\tpop"
[(set_attr "type" "unknown")
- ; See tls_get_tp_<mode>
+ ; Since rdhwr always generates a trap for now, putting it in a delay
+ ; slot would make the kernel's emulation of it much slower.
(set_attr "can_delay" "no")
(set_attr "mode" "<MODE>")])
@@ -6952,7 +6912,7 @@
(set (match_dup 0) (reg:P TLS_GET_TP_REGNUM))]
""
[(set_attr "type" "multi")
- (set_attr "length" "8")
+ (set_attr "insn_count" "4")
(set_attr "mode" "<MODE>")])
(define_insn "*tls_get_tp_mips16_call_<mode>"
@@ -6964,7 +6924,7 @@
"HAVE_AS_TLS && TARGET_MIPS16"
{ return MIPS_CALL ("jal", operands, 0, -1); }
[(set_attr "type" "call")
- (set_attr "length" "6")
+ (set_attr "insn_count" "3")
(set_attr "mode" "<MODE>")])
;; Named pattern for expanding thread pointer reference.
diff --git a/gcc/config/mips/mips.opt b/gcc/config/mips/mips.opt
index e11710db3c0..08ab29b1810 100644
--- a/gcc/config/mips/mips.opt
+++ b/gcc/config/mips/mips.opt
@@ -141,6 +141,10 @@ membedded-data
Target Report Var(TARGET_EMBEDDED_DATA)
Use ROM instead of RAM
+meva
+Target Report Var(TARGET_EVA)
+Use Enhanced Virtual Addressing instructions
+
mexplicit-relocs
Target Report Mask(EXPLICIT_RELOCS)
Use NewABI-style %reloc() assembly operators
diff --git a/gcc/config/mips/mti-linux.h b/gcc/config/mips/mti-linux.h
index a3fb48976bd..45bc0b88107 100644
--- a/gcc/config/mips/mti-linux.h
+++ b/gcc/config/mips/mti-linux.h
@@ -20,7 +20,7 @@ along with GCC; see the file COPYING3. If not see
/* This target is a multilib target, specify the sysroot paths. */
#undef SYSROOT_SUFFIX_SPEC
#define SYSROOT_SUFFIX_SPEC \
- "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}"
+ "%{mips32:/mips32}%{mips64:/mips64}%{mips64r2:/mips64r2}%{mips16:/mips16}%{mmicromips:/micromips}%{mabi=64:/64}%{mel|EL:/el}%{msoft-float:/sof}"
#undef DRIVER_SELF_SPECS
#define DRIVER_SELF_SPECS \
diff --git a/gcc/config/mips/n32-elf.h b/gcc/config/mips/n32-elf.h
new file mode 100644
index 00000000000..0f41a6e9fc7
--- /dev/null
+++ b/gcc/config/mips/n32-elf.h
@@ -0,0 +1,35 @@
+/* Definitions of target machine for GNU compiler.
+ n32 for embedded systems.
+ Copyright (C) 2003-2013 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+/* Use standard ELF-style local labels (not '$' as on early Irix). */
+#undef LOCAL_LABEL_PREFIX
+#define LOCAL_LABEL_PREFIX "."
+
+/* Use periods rather than dollar signs in special g++ assembler names. */
+#define NO_DOLLAR_IN_LABEL
+
+/* Force n32 to use 64-bit long doubles. */
+#undef LONG_DOUBLE_TYPE_SIZE
+#define LONG_DOUBLE_TYPE_SIZE 64
+
+#ifdef IN_LIBGCC2
+#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
+#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
+#endif
diff --git a/gcc/config/mips/sde.h b/gcc/config/mips/sde.h
index d42fee6309f..d35f79f25be 100644
--- a/gcc/config/mips/sde.h
+++ b/gcc/config/mips/sde.h
@@ -89,23 +89,6 @@ along with GCC; see the file COPYING3. If not see
#undef PTRDIFF_TYPE
#define PTRDIFF_TYPE "long int"
-/* Use standard ELF-style local labels (not '$' as on early Irix). */
-#undef LOCAL_LABEL_PREFIX
-#define LOCAL_LABEL_PREFIX "."
-
-/* Use periods rather than dollar signs in special g++ assembler names. */
-#define NO_DOLLAR_IN_LABEL
-
-/* Currently we don't support 128bit long doubles, so for now we force
- n32 to be 64bit. */
-#undef LONG_DOUBLE_TYPE_SIZE
-#define LONG_DOUBLE_TYPE_SIZE 64
-
-#ifdef IN_LIBGCC2
-#undef LIBGCC2_LONG_DOUBLE_TYPE_SIZE
-#define LIBGCC2_LONG_DOUBLE_TYPE_SIZE 64
-#endif
-
/* Force all .init and .fini entries to be 32-bit, not mips16, so that
in a mixed environment they are all the same mode. The crti.asm and
crtn.asm files will also be compiled as 32-bit due to the
diff --git a/gcc/config/mips/t-mti-elf b/gcc/config/mips/t-mti-elf
index 3f0868fb856..bce8f063452 100644
--- a/gcc/config/mips/t-mti-elf
+++ b/gcc/config/mips/t-mti-elf
@@ -16,20 +16,29 @@
# along with GCC; see the file COPYING3. If not see
# <http://www.gnu.org/licenses/>.
-MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mabi=64 EL msoft-float
-MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 64 el sof
-MULTILIB_MATCHES = EL=mel EB=meb
+# The default build is mips32r2, hard-float big-endian. Add mips32,
+# soft-float, and little-endian variations.
-# We do not want to build mips16 versions of mips64* architectures.
-MULTILIB_EXCEPTIONS += *mips64*/*mips16*
+MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float
+MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof
+MULTILIB_MATCHES = EL=mel EB=meb
-# 64 bit ABI is not supported on mips32 architecture.
+# The 64 bit ABI is not supported on the mips32 architecture.
MULTILIB_EXCEPTIONS += *mips32*/*mabi=64*
-# The 64 bit ABI is not supported on the mips32r2 bit architecture.
-# Because mips32r2 is the default the exception list is a little messy.
-# Basically we are saying any list that doesn't specify mips32, mips64,
-# or mips64r2 but does specify mabi=64 is not allowed because that
-# would be defaulting to the mips32r2 architecture.
+# The 64 bit ABI is not supported on the mips32r2 architecture.
+# Because mips32r2 is the default we can't use that flag to trigger
+# the exception so we check for mabi=64 with no specific mips
+# architecture flag instead.
MULTILIB_EXCEPTIONS += mabi=64*
-MULTILIB_EXCEPTIONS += mips16/mabi=64*
+
+# We do not want to build mips16 versions of mips64* architectures.
+MULTILIB_EXCEPTIONS += *mips64*/*mips16*
+MULTILIB_EXCEPTIONS += *mips16/mabi=64*
+
+# We only want micromips for mips32r2 architecture and we do not want
+# it used in conjunction with -mips16.
+MULTILIB_EXCEPTIONS += *mips16/mmicromips*
+MULTILIB_EXCEPTIONS += *mips64*/mmicromips*
+MULTILIB_EXCEPTIONS += *mips32/mmicromips*
+MULTILIB_EXCEPTIONS += *mmicromips/mabi=64*
diff --git a/gcc/config/mips/t-mti-linux b/gcc/config/mips/t-mti-linux
index 775a68d9dae..bce8f063452 100644
--- a/gcc/config/mips/t-mti-linux
+++ b/gcc/config/mips/t-mti-linux
@@ -19,8 +19,8 @@
# The default build is mips32r2, hard-float big-endian. Add mips32,
# soft-float, and little-endian variations.
-MULTILIB_OPTIONS = mips32/mips64/mips64r2 mabi=64 EL msoft-float
-MULTILIB_DIRNAMES = mips32 mips64 mips64r2 64 el sof
+MULTILIB_OPTIONS = mips32/mips64/mips64r2 mips16 mmicromips mabi=64 EL msoft-float
+MULTILIB_DIRNAMES = mips32 mips64 mips64r2 mips16 micromips 64 el sof
MULTILIB_MATCHES = EL=mel EB=meb
# The 64 bit ABI is not supported on the mips32 architecture.
@@ -28,6 +28,17 @@ MULTILIB_EXCEPTIONS += *mips32*/*mabi=64*
# The 64 bit ABI is not supported on the mips32r2 architecture.
# Because mips32r2 is the default we can't use that flag to trigger
-# the exception so we check for mabi=64 with no specific mips flag
-# instead.
+# the exception so we check for mabi=64 with no specific mips
+# architecture flag instead.
MULTILIB_EXCEPTIONS += mabi=64*
+
+# We do not want to build mips16 versions of mips64* architectures.
+MULTILIB_EXCEPTIONS += *mips64*/*mips16*
+MULTILIB_EXCEPTIONS += *mips16/mabi=64*
+
+# We only want micromips for mips32r2 architecture and we do not want
+# it used in conjunction with -mips16.
+MULTILIB_EXCEPTIONS += *mips16/mmicromips*
+MULTILIB_EXCEPTIONS += *mips64*/mmicromips*
+MULTILIB_EXCEPTIONS += *mips32/mmicromips*
+MULTILIB_EXCEPTIONS += *mmicromips/mabi=64*
diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c
index 1af09e559b0..bd37067dfc4 100644
--- a/gcc/config/mmix/mmix.c
+++ b/gcc/config/mmix/mmix.c
@@ -313,7 +313,7 @@ mmix_init_machine_status (void)
return ggc_alloc_cleared_machine_function ();
}
-/* DATA_ALIGNMENT.
+/* DATA_ABI_ALIGNMENT.
We have trouble getting the address of stuff that is located at other
than 32-bit alignments (GETA requirements), so try to give everything
at least 32-bit alignment. */
diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h
index 4ca1a2b8c86..c5edc5777a9 100644
--- a/gcc/config/mmix/mmix.h
+++ b/gcc/config/mmix/mmix.h
@@ -164,7 +164,7 @@ struct GTY(()) machine_function
/* Copied from elfos.h. */
#define MAX_OFILE_ALIGNMENT (32768 * 8)
-#define DATA_ALIGNMENT(TYPE, BASIC_ALIGN) \
+#define DATA_ABI_ALIGNMENT(TYPE, BASIC_ALIGN) \
mmix_data_alignment (TYPE, BASIC_ALIGN)
#define CONSTANT_ALIGNMENT(CONSTANT, BASIC_ALIGN) \
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index 2e18bebf3d8..c2ed7389bc4 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -647,6 +647,15 @@ rl78_addr_space_pointer_mode (addr_space_t addrspace)
}
}
+/* Returns TRUE for valid addresses. */
+#undef TARGET_VALID_POINTER_MODE
+#define TARGET_VALID_POINTER_MODE rl78_valid_pointer_mode
+static bool
+rl78_valid_pointer_mode (enum machine_mode m)
+{
+ return (m == HImode || m == SImode);
+}
+
/* Return the appropriate mode for a named address address. */
#undef TARGET_ADDR_SPACE_ADDRESS_MODE
#define TARGET_ADDR_SPACE_ADDRESS_MODE rl78_addr_space_address_mode
@@ -2730,6 +2739,16 @@ rl78_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
}
+
+#undef TARGET_UNWIND_WORD_MODE
+#define TARGET_UNWIND_WORD_MODE rl78_unwind_word_mode
+
+static enum machine_mode
+rl78_unwind_word_mode (void)
+{
+ return HImode;
+}
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rl78.h"
diff --git a/gcc/config/rl78/rl78.md b/gcc/config/rl78/rl78.md
index b3cfe6d1bbc..efc26210498 100644
--- a/gcc/config/rl78/rl78.md
+++ b/gcc/config/rl78/rl78.md
@@ -235,6 +235,24 @@
[(set_attr "valloc" "macax")]
)
+(define_expand "mulqi3"
+ [(set (match_operand:QI 0 "register_operand" "")
+ (mult:QI (match_operand:QI 1 "general_operand" "")
+ (match_operand:QI 2 "nonmemory_operand" "")))
+ ]
+ "" ; mulu supported by all targets
+ ""
+)
+
+(define_expand "mulhi3"
+ [(set (match_operand:HI 0 "register_operand" "")
+ (mult:HI (match_operand:HI 1 "general_operand" "")
+ (match_operand:HI 2 "nonmemory_operand" "")))
+ ]
+ "! RL78_MUL_NONE"
+ ""
+)
+
(define_expand "mulsi3"
[(set (match_operand:SI 0 "register_operand" "=&v")
(mult:SI (match_operand:SI 1 "nonmemory_operand" "vi")
@@ -244,6 +262,58 @@
""
)
+(define_insn "*mulqi3_rl78"
+ [(set (match_operand:QI 0 "register_operand" "=&v")
+ (mult:QI (match_operand:QI 1 "general_operand" "+viU")
+ (match_operand:QI 2 "general_operand" "vi")))
+ ]
+ "" ; mulu supported by all targets
+ "; mulqi macro %0 = %1 * %2
+ mov a, %h1
+ mov x, a
+ mov a, %h2
+ mulu x ; ax = a * x
+ mov a, x
+ mov %h0, a
+ ; end of mulqi macro"
+;; [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_rl78"
+ [(set (match_operand:HI 0 "register_operand" "=&v")
+ (mult:HI (match_operand:HI 1 "general_operand" "+viU")
+ (match_operand:HI 2 "general_operand" "vi")))
+ ]
+ "RL78_MUL_RL78"
+ "; mulhi macro %0 = %1 * %2
+ movw ax, %h1
+ movw bc, %h2
+ mulhu ; bcax = bc * ax
+ movw %h0, ax
+ ; end of mulhi macro"
+;; [(set_attr "valloc" "macax")]
+)
+
+(define_insn "*mulhi3_g13"
+ [(set (match_operand:HI 0 "register_operand" "=&v")
+ (mult:HI (match_operand:HI 1 "general_operand" "+viU")
+ (match_operand:HI 2 "general_operand" "vi")))
+ ]
+ "RL78_MUL_G13"
+ "; mulhi macro %0 = %1 * %2
+ mov a, #0x00
+ mov !0xf00e8, a ; MDUC
+ movw ax, %h1
+ movw 0xffff0, ax ; MDAL
+ movw ax, %h2
+ movw 0xffff2, ax ; MDAH
+ nop ; mdb = mdal * mdah
+ movw ax, 0xffff6 ; MDBL
+ movw %h0, ax
+ ; end of mulhi macro"
+;; [(set_attr "valloc" "umul")]
+)
+
;; 0xFFFF0 is MACR(L). 0xFFFF2 is MACR(H) but we don't care about it
;; because we're only using the lower 16 bits (which is the upper 16
;; bits of the result).
diff --git a/gcc/config/rs6000/altivec.h b/gcc/config/rs6000/altivec.h
index fd6d07f50ff..4b91c5c5e24 100644
--- a/gcc/config/rs6000/altivec.h
+++ b/gcc/config/rs6000/altivec.h
@@ -321,6 +321,42 @@
#define vec_vsx_st __builtin_vec_vsx_st
#endif
+#ifdef _ARCH_PWR8
+/* Vector additions added in ISA 2.07. */
+#define vec_eqv __builtin_vec_eqv
+#define vec_nand __builtin_vec_nand
+#define vec_orc __builtin_vec_orc
+#define vec_vaddudm __builtin_vec_vaddudm
+#define vec_vclz __builtin_vec_vclz
+#define vec_vclzb __builtin_vec_vclzb
+#define vec_vclzd __builtin_vec_vclzd
+#define vec_vclzh __builtin_vec_vclzh
+#define vec_vclzw __builtin_vec_vclzw
+#define vec_vgbbd __builtin_vec_vgbbd
+#define vec_vmaxsd __builtin_vec_vmaxsd
+#define vec_vmaxud __builtin_vec_vmaxud
+#define vec_vminsd __builtin_vec_vminsd
+#define vec_vminud __builtin_vec_vminud
+#define vec_vmrgew __builtin_vec_vmrgew
+#define vec_vmrgow __builtin_vec_vmrgow
+#define vec_vpksdss __builtin_vec_vpksdss
+#define vec_vpksdus __builtin_vec_vpksdus
+#define vec_vpkudum __builtin_vec_vpkudum
+#define vec_vpkudus __builtin_vec_vpkudus
+#define vec_vpopcnt __builtin_vec_vpopcnt
+#define vec_vpopcntb __builtin_vec_vpopcntb
+#define vec_vpopcntd __builtin_vec_vpopcntd
+#define vec_vpopcnth __builtin_vec_vpopcnth
+#define vec_vpopcntw __builtin_vec_vpopcntw
+#define vec_vrld __builtin_vec_vrld
+#define vec_vsld __builtin_vec_vsld
+#define vec_vsrad __builtin_vec_vsrad
+#define vec_vsrd __builtin_vec_vsrd
+#define vec_vsubudm __builtin_vec_vsubudm
+#define vec_vupkhsw __builtin_vec_vupkhsw
+#define vec_vupklsw __builtin_vec_vupklsw
+#endif
+
/* Predicates.
For C++, we use templates in order to allow non-parenthesized arguments.
For C, instead, we use macros since non-parenthesized arguments were
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md
index 1b0b5c3fb13..6607e450be3 100644
--- a/gcc/config/rs6000/altivec.md
+++ b/gcc/config/rs6000/altivec.md
@@ -41,15 +41,11 @@
UNSPEC_VMULOSB
UNSPEC_VMULOUH
UNSPEC_VMULOSH
- UNSPEC_VPKUHUM
- UNSPEC_VPKUWUM
UNSPEC_VPKPX
- UNSPEC_VPKSHSS
- UNSPEC_VPKSWSS
- UNSPEC_VPKUHUS
- UNSPEC_VPKSHUS
- UNSPEC_VPKUWUS
- UNSPEC_VPKSWUS
+ UNSPEC_VPACK_SIGN_SIGN_SAT
+ UNSPEC_VPACK_SIGN_UNS_SAT
+ UNSPEC_VPACK_UNS_UNS_SAT
+ UNSPEC_VPACK_UNS_UNS_MOD
UNSPEC_VSLV4SI
UNSPEC_VSLO
UNSPEC_VSR
@@ -71,12 +67,10 @@
UNSPEC_VLOGEFP
UNSPEC_VEXPTEFP
UNSPEC_VLSDOI
- UNSPEC_VUPKHSB
+ UNSPEC_VUNPACK_HI_SIGN
+ UNSPEC_VUNPACK_LO_SIGN
UNSPEC_VUPKHPX
- UNSPEC_VUPKHSH
- UNSPEC_VUPKLSB
UNSPEC_VUPKLPX
- UNSPEC_VUPKLSH
UNSPEC_DST
UNSPEC_DSTT
UNSPEC_DSTST
@@ -134,6 +128,7 @@
UNSPEC_VUPKLS_V4SF
UNSPEC_VUPKHU_V4SF
UNSPEC_VUPKLU_V4SF
+ UNSPEC_VGBBD
])
(define_c_enum "unspecv"
@@ -146,6 +141,8 @@
;; Vec int modes
(define_mode_iterator VI [V4SI V8HI V16QI])
+;; Like VI, but add ISA 2.07 integer vector ops
+(define_mode_iterator VI2 [V4SI V8HI V16QI V2DI])
;; Short vec in modes
(define_mode_iterator VIshort [V8HI V16QI])
;; Vec float modes
@@ -159,8 +156,18 @@
;; Like VM, except don't do TImode
(define_mode_iterator VM2 [V4SI V8HI V16QI V4SF V2DF V2DI])
-(define_mode_attr VI_char [(V4SI "w") (V8HI "h") (V16QI "b")])
-(define_mode_attr VI_scalar [(V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_char [(V2DI "d") (V4SI "w") (V8HI "h") (V16QI "b")])
+(define_mode_attr VI_scalar [(V2DI "DI") (V4SI "SI") (V8HI "HI") (V16QI "QI")])
+(define_mode_attr VI_unit [(V16QI "VECTOR_UNIT_ALTIVEC_P (V16QImode)")
+ (V8HI "VECTOR_UNIT_ALTIVEC_P (V8HImode)")
+ (V4SI "VECTOR_UNIT_ALTIVEC_P (V4SImode)")
+ (V2DI "VECTOR_UNIT_P8_VECTOR_P (V2DImode)")])
+
+;; Vector pack/unpack
+(define_mode_iterator VP [V2DI V4SI V8HI])
+(define_mode_attr VP_small [(V2DI "V4SI") (V4SI "V8HI") (V8HI "V16QI")])
+(define_mode_attr VP_small_lc [(V2DI "v4si") (V4SI "v8hi") (V8HI "v16qi")])
+(define_mode_attr VU_char [(V2DI "w") (V4SI "h") (V8HI "b")])
;; Vector move instructions.
(define_insn "*altivec_mov<mode>"
@@ -378,10 +385,10 @@
;; add
(define_insn "add<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (plus:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (plus:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vaddu<VI_char>m %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -398,17 +405,17 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
(match_operand:V4SI 2 "register_operand" "v")]
UNSPEC_VADDCUW))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
"vaddcuw %0,%1,%2"
[(set_attr "type" "vecsimple")])
(define_insn "altivec_vaddu<VI_char>s"
[(set (match_operand:VI 0 "register_operand" "=v")
(unspec:VI [(match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")]
+ (match_operand:VI 2 "register_operand" "v")]
UNSPEC_VADDU))
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
+ "<VI_unit>"
"vaddu<VI_char>s %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -418,16 +425,16 @@
(match_operand:VI 2 "register_operand" "v")]
UNSPEC_VADDS))
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
"vadds<VI_char>s %0,%1,%2"
[(set_attr "type" "vecsimple")])
;; sub
(define_insn "sub<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (minus:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (minus:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vsubu<VI_char>m %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -444,7 +451,7 @@
(unspec:V4SI [(match_operand:V4SI 1 "register_operand" "v")
(match_operand:V4SI 2 "register_operand" "v")]
UNSPEC_VSUBCUW))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
"vsubcuw %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -454,7 +461,7 @@
(match_operand:VI 2 "register_operand" "v")]
UNSPEC_VSUBU))
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
"vsubu<VI_char>s %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -464,7 +471,7 @@
(match_operand:VI 2 "register_operand" "v")]
UNSPEC_VSUBS))
(set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
"vsubs<VI_char>s %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -483,7 +490,7 @@
(unspec:VI [(match_operand:VI 1 "register_operand" "v")
(match_operand:VI 2 "register_operand" "v")]
UNSPEC_VAVGS))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
"vavgs<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -492,31 +499,31 @@
(unspec:V4SI [(match_operand:V4SF 1 "register_operand" "v")
(match_operand:V4SF 2 "register_operand" "v")]
UNSPEC_VCMPBFP))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_P (V4SImode)"
"vcmpbfp %0,%1,%2"
[(set_attr "type" "veccmp")])
(define_insn "*altivec_eq<mode>"
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
- (eq:VI (match_operand:VI 1 "altivec_register_operand" "v")
- (match_operand:VI 2 "altivec_register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+ (eq:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
+ "<VI_unit>"
"vcmpequ<VI_char> %0,%1,%2"
[(set_attr "type" "veccmp")])
(define_insn "*altivec_gt<mode>"
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
- (gt:VI (match_operand:VI 1 "altivec_register_operand" "v")
- (match_operand:VI 2 "altivec_register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+ (gt:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
+ "<VI_unit>"
"vcmpgts<VI_char> %0,%1,%2"
[(set_attr "type" "veccmp")])
(define_insn "*altivec_gtu<mode>"
- [(set (match_operand:VI 0 "altivec_register_operand" "=v")
- (gtu:VI (match_operand:VI 1 "altivec_register_operand" "v")
- (match_operand:VI 2 "altivec_register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "altivec_register_operand" "=v")
+ (gtu:VI2 (match_operand:VI2 1 "altivec_register_operand" "v")
+ (match_operand:VI2 2 "altivec_register_operand" "v")))]
+ "<VI_unit>"
"vcmpgtu<VI_char> %0,%1,%2"
[(set_attr "type" "veccmp")])
@@ -744,18 +751,18 @@
;; max
(define_insn "umax<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (umax:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (umax:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vmaxu<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
(define_insn "smax<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (smax:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (smax:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vmaxs<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -768,18 +775,18 @@
[(set_attr "type" "veccmp")])
(define_insn "umin<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (umin:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (umin:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vminu<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
(define_insn "smin<mode>3"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (smin:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (smin:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vmins<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -935,6 +942,31 @@
"vmrglw %0,%1,%2"
[(set_attr "type" "vecperm")])
+;; Power8 vector merge even/odd
+(define_insn "p8_vmrgew"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v"))
+ (parallel [(const_int 0) (const_int 4)
+ (const_int 2) (const_int 6)])))]
+ "TARGET_P8_VECTOR"
+ "vmrgew %0,%1,%2"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "p8_vmrgow"
+ [(set (match_operand:V4SI 0 "register_operand" "=v")
+ (vec_select:V4SI
+ (vec_concat:V8SI
+ (match_operand:V4SI 1 "register_operand" "v")
+ (match_operand:V4SI 2 "register_operand" "v"))
+ (parallel [(const_int 1) (const_int 5)
+ (const_int 3) (const_int 7)])))]
+ "TARGET_P8_VECTOR"
+ "vmrgow %0,%1,%2"
+ [(set_attr "type" "vecperm")])
+
(define_insn "vec_widen_umult_even_v16qi"
[(set (match_operand:V8HI 0 "register_operand" "=v")
(unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")
@@ -1011,10 +1043,13 @@
;; logical ops. Have the logical ops follow the memory ops in
;; terms of whether to prefer VSX or Altivec
+;; AND has a clobber to be consistant with VSX, which adds splitters for using
+;; the GPR registers.
(define_insn "*altivec_and<mode>3"
[(set (match_operand:VM 0 "register_operand" "=v")
(and:VM (match_operand:VM 1 "register_operand" "v")
- (match_operand:VM 2 "register_operand" "v")))]
+ (match_operand:VM 2 "register_operand" "v")))
+ (clobber (match_scratch:CC 3 "=X"))]
"VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
"vand %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -1044,8 +1079,8 @@
(define_insn "*altivec_nor<mode>3"
[(set (match_operand:VM 0 "register_operand" "=v")
- (not:VM (ior:VM (match_operand:VM 1 "register_operand" "v")
- (match_operand:VM 2 "register_operand" "v"))))]
+ (and:VM (not:VM (match_operand:VM 1 "register_operand" "v"))
+ (not:VM (match_operand:VM 2 "register_operand" "v"))))]
"VECTOR_MEM_ALTIVEC_P (<MODE>mode)"
"vnor %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -1058,24 +1093,6 @@
"vandc %0,%1,%2"
[(set_attr "type" "vecsimple")])
-(define_insn "altivec_vpkuhum"
- [(set (match_operand:V16QI 0 "register_operand" "=v")
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
- (match_operand:V8HI 2 "register_operand" "v")]
- UNSPEC_VPKUHUM))]
- "TARGET_ALTIVEC"
- "vpkuhum %0,%1,%2"
- [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkuwum"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
- (match_operand:V4SI 2 "register_operand" "v")]
- UNSPEC_VPKUWUM))]
- "TARGET_ALTIVEC"
- "vpkuwum %0,%1,%2"
- [(set_attr "type" "vecperm")])
-
(define_insn "altivec_vpkpx"
[(set (match_operand:V8HI 0 "register_operand" "=v")
(unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
@@ -1085,71 +1102,47 @@
"vpkpx %0,%1,%2"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vpkshss"
- [(set (match_operand:V16QI 0 "register_operand" "=v")
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
- (match_operand:V8HI 2 "register_operand" "v")]
- UNSPEC_VPKSHSS))
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
- "vpkshss %0,%1,%2"
- [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkswss"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
- (match_operand:V4SI 2 "register_operand" "v")]
- UNSPEC_VPKSWSS))
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
- "vpkswss %0,%1,%2"
- [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vpkuhus"
- [(set (match_operand:V16QI 0 "register_operand" "=v")
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
- (match_operand:V8HI 2 "register_operand" "v")]
- UNSPEC_VPKUHUS))
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
- "vpkuhus %0,%1,%2"
+(define_insn "altivec_vpks<VI_char>ss"
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+ (match_operand:VP 2 "register_operand" "v")]
+ UNSPEC_VPACK_SIGN_SIGN_SAT))]
+ "<VI_unit>"
+ "vpks<VI_char>ss %0,%1,%2"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vpkshus"
- [(set (match_operand:V16QI 0 "register_operand" "=v")
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
- (match_operand:V8HI 2 "register_operand" "v")]
- UNSPEC_VPKSHUS))
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
- "vpkshus %0,%1,%2"
+(define_insn "altivec_vpks<VI_char>us"
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+ (match_operand:VP 2 "register_operand" "v")]
+ UNSPEC_VPACK_SIGN_UNS_SAT))]
+ "<VI_unit>"
+ "vpks<VI_char>us %0,%1,%2"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vpkuwus"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
- (match_operand:V4SI 2 "register_operand" "v")]
- UNSPEC_VPKUWUS))
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
- "vpkuwus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>us"
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+ (match_operand:VP 2 "register_operand" "v")]
+ UNSPEC_VPACK_UNS_UNS_SAT))]
+ "<VI_unit>"
+ "vpku<VI_char>us %0,%1,%2"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vpkswus"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
- (match_operand:V4SI 2 "register_operand" "v")]
- UNSPEC_VPKSWUS))
- (set (reg:SI 110) (unspec:SI [(const_int 0)] UNSPEC_SET_VSCR))]
- "TARGET_ALTIVEC"
- "vpkswus %0,%1,%2"
+(define_insn "altivec_vpku<VI_char>um"
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+ (match_operand:VP 2 "register_operand" "v")]
+ UNSPEC_VPACK_UNS_UNS_MOD))]
+ "<VI_unit>"
+ "vpku<VI_char>um %0,%1,%2"
[(set_attr "type" "vecperm")])
(define_insn "*altivec_vrl<VI_char>"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (rotate:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (rotate:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vrl<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -1172,26 +1165,26 @@
[(set_attr "type" "vecperm")])
(define_insn "*altivec_vsl<VI_char>"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (ashift:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (ashift:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vsl<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
(define_insn "*altivec_vsr<VI_char>"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (lshiftrt:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (lshiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vsr<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
(define_insn "*altivec_vsra<VI_char>"
- [(set (match_operand:VI 0 "register_operand" "=v")
- (ashiftrt:VI (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v")))]
- "TARGET_ALTIVEC"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (ashiftrt:VI2 (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v")))]
+ "<VI_unit>"
"vsra<VI_char> %0,%1,%2"
[(set_attr "type" "vecsimple")])
@@ -1476,12 +1469,20 @@
"vsldoi %0,%1,%2,%3"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vupkhsb"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
- UNSPEC_VUPKHSB))]
- "TARGET_ALTIVEC"
- "vupkhsb %0,%1"
+(define_insn "altivec_vupkhs<VU_char>"
+ [(set (match_operand:VP 0 "register_operand" "=v")
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+ UNSPEC_VUNPACK_HI_SIGN))]
+ "<VI_unit>"
+ "vupkhs<VU_char> %0,%1"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "altivec_vupkls<VU_char>"
+ [(set (match_operand:VP 0 "register_operand" "=v")
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+ UNSPEC_VUNPACK_LO_SIGN))]
+ "<VI_unit>"
+ "vupkls<VU_char> %0,%1"
[(set_attr "type" "vecperm")])
(define_insn "altivec_vupkhpx"
@@ -1492,22 +1493,6 @@
"vupkhpx %0,%1"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vupkhsh"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
- UNSPEC_VUPKHSH))]
- "TARGET_ALTIVEC"
- "vupkhsh %0,%1"
- [(set_attr "type" "vecperm")])
-
-(define_insn "altivec_vupklsb"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
- UNSPEC_VUPKLSB))]
- "TARGET_ALTIVEC"
- "vupklsb %0,%1"
- [(set_attr "type" "vecperm")])
-
(define_insn "altivec_vupklpx"
[(set (match_operand:V4SI 0 "register_operand" "=v")
(unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
@@ -1516,49 +1501,41 @@
"vupklpx %0,%1"
[(set_attr "type" "vecperm")])
-(define_insn "altivec_vupklsh"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
- UNSPEC_VUPKLSH))]
- "TARGET_ALTIVEC"
- "vupklsh %0,%1"
- [(set_attr "type" "vecperm")])
-
;; Compare vectors producing a vector result and a predicate, setting CR6 to
;; indicate a combined status
(define_insn "*altivec_vcmpequ<VI_char>_p"
[(set (reg:CC 74)
- (unspec:CC [(eq:CC (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v"))]
+ (unspec:CC [(eq:CC (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v"))]
UNSPEC_PREDICATE))
- (set (match_operand:VI 0 "register_operand" "=v")
- (eq:VI (match_dup 1)
- (match_dup 2)))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ (set (match_operand:VI2 0 "register_operand" "=v")
+ (eq:VI2 (match_dup 1)
+ (match_dup 2)))]
+ "<VI_unit>"
"vcmpequ<VI_char>. %0,%1,%2"
[(set_attr "type" "veccmp")])
(define_insn "*altivec_vcmpgts<VI_char>_p"
[(set (reg:CC 74)
- (unspec:CC [(gt:CC (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v"))]
+ (unspec:CC [(gt:CC (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v"))]
UNSPEC_PREDICATE))
- (set (match_operand:VI 0 "register_operand" "=v")
- (gt:VI (match_dup 1)
- (match_dup 2)))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ (set (match_operand:VI2 0 "register_operand" "=v")
+ (gt:VI2 (match_dup 1)
+ (match_dup 2)))]
+ "<VI_unit>"
"vcmpgts<VI_char>. %0,%1,%2"
[(set_attr "type" "veccmp")])
(define_insn "*altivec_vcmpgtu<VI_char>_p"
[(set (reg:CC 74)
- (unspec:CC [(gtu:CC (match_operand:VI 1 "register_operand" "v")
- (match_operand:VI 2 "register_operand" "v"))]
+ (unspec:CC [(gtu:CC (match_operand:VI2 1 "register_operand" "v")
+ (match_operand:VI2 2 "register_operand" "v"))]
UNSPEC_PREDICATE))
- (set (match_operand:VI 0 "register_operand" "=v")
- (gtu:VI (match_dup 1)
- (match_dup 2)))]
- "VECTOR_UNIT_ALTIVEC_P (<MODE>mode)"
+ (set (match_operand:VI2 0 "register_operand" "=v")
+ (gtu:VI2 (match_dup 1)
+ (match_dup 2)))]
+ "<VI_unit>"
"vcmpgtu<VI_char>. %0,%1,%2"
[(set_attr "type" "veccmp")])
@@ -1779,20 +1756,28 @@
[(set_attr "type" "vecstore")])
;; Generate
-;; vspltis? SCRATCH0,0
+;; xxlxor/vxor SCRATCH0,SCRATCH0,SCRATCH0
;; vsubu?m SCRATCH2,SCRATCH1,%1
;; vmaxs? %0,%1,SCRATCH2"
(define_expand "abs<mode>2"
- [(set (match_dup 2) (vec_duplicate:VI (const_int 0)))
- (set (match_dup 3)
- (minus:VI (match_dup 2)
- (match_operand:VI 1 "register_operand" "v")))
- (set (match_operand:VI 0 "register_operand" "=v")
- (smax:VI (match_dup 1) (match_dup 3)))]
- "TARGET_ALTIVEC"
+ [(set (match_dup 2) (match_dup 3))
+ (set (match_dup 4)
+ (minus:VI2 (match_dup 2)
+ (match_operand:VI2 1 "register_operand" "v")))
+ (set (match_operand:VI2 0 "register_operand" "=v")
+ (smax:VI2 (match_dup 1) (match_dup 4)))]
+ "<VI_unit>"
{
- operands[2] = gen_reg_rtx (GET_MODE (operands[0]));
- operands[3] = gen_reg_rtx (GET_MODE (operands[0]));
+ int i, n_elt = GET_MODE_NUNITS (<MODE>mode);
+ rtvec v = rtvec_alloc (n_elt);
+
+ /* Create an all 0 constant. */
+ for (i = 0; i < n_elt; ++i)
+ RTVEC_ELT (v, i) = const0_rtx;
+
+ operands[2] = gen_reg_rtx (<MODE>mode);
+ operands[3] = gen_rtx_CONST_VECTOR (<MODE>mode, v);
+ operands[4] = gen_reg_rtx (<MODE>mode);
})
;; Generate
@@ -1950,49 +1935,19 @@
DONE;
}")
-(define_expand "vec_unpacks_hi_v16qi"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
- UNSPEC_VUPKHSB))]
- "TARGET_ALTIVEC"
- "
-{
- emit_insn (gen_altivec_vupkhsb (operands[0], operands[1]));
- DONE;
-}")
-
-(define_expand "vec_unpacks_hi_v8hi"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
- UNSPEC_VUPKHSH))]
- "TARGET_ALTIVEC"
- "
-{
- emit_insn (gen_altivec_vupkhsh (operands[0], operands[1]));
- DONE;
-}")
-
-(define_expand "vec_unpacks_lo_v16qi"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V16QI 1 "register_operand" "v")]
- UNSPEC_VUPKLSB))]
- "TARGET_ALTIVEC"
- "
-{
- emit_insn (gen_altivec_vupklsb (operands[0], operands[1]));
- DONE;
-}")
+(define_expand "vec_unpacks_hi_<VP_small_lc>"
+ [(set (match_operand:VP 0 "register_operand" "=v")
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+ UNSPEC_VUNPACK_HI_SIGN))]
+ "<VI_unit>"
+ "")
-(define_expand "vec_unpacks_lo_v8hi"
- [(set (match_operand:V4SI 0 "register_operand" "=v")
- (unspec:V4SI [(match_operand:V8HI 1 "register_operand" "v")]
- UNSPEC_VUPKLSH))]
- "TARGET_ALTIVEC"
- "
-{
- emit_insn (gen_altivec_vupklsh (operands[0], operands[1]));
- DONE;
-}")
+(define_expand "vec_unpacks_lo_<VP_small_lc>"
+ [(set (match_operand:VP 0 "register_operand" "=v")
+ (unspec:VP [(match_operand:<VP_small> 1 "register_operand" "v")]
+ UNSPEC_VUNPACK_LO_SIGN))]
+ "<VI_unit>"
+ "")
(define_insn "vperm_v8hiv4si"
[(set (match_operand:V4SI 0 "register_operand" "=v")
@@ -2291,29 +2246,13 @@
DONE;
}")
-(define_expand "vec_pack_trunc_v8hi"
- [(set (match_operand:V16QI 0 "register_operand" "=v")
- (unspec:V16QI [(match_operand:V8HI 1 "register_operand" "v")
- (match_operand:V8HI 2 "register_operand" "v")]
- UNSPEC_VPKUHUM))]
- "TARGET_ALTIVEC"
- "
-{
- emit_insn (gen_altivec_vpkuhum (operands[0], operands[1], operands[2]));
- DONE;
-}")
-
-(define_expand "vec_pack_trunc_v4si"
- [(set (match_operand:V8HI 0 "register_operand" "=v")
- (unspec:V8HI [(match_operand:V4SI 1 "register_operand" "v")
- (match_operand:V4SI 2 "register_operand" "v")]
- UNSPEC_VPKUWUM))]
- "TARGET_ALTIVEC"
- "
-{
- emit_insn (gen_altivec_vpkuwum (operands[0], operands[1], operands[2]));
- DONE;
-}")
+(define_expand "vec_pack_trunc_<mode>"
+ [(set (match_operand:<VP_small> 0 "register_operand" "=v")
+ (unspec:<VP_small> [(match_operand:VP 1 "register_operand" "v")
+ (match_operand:VP 2 "register_operand" "v")]
+ UNSPEC_VPACK_UNS_UNS_MOD))]
+ "<VI_unit>"
+ "")
(define_expand "altivec_negv4sf2"
[(use (match_operand:V4SF 0 "register_operand" ""))
@@ -2460,3 +2399,34 @@
emit_insn (gen_altivec_vcfux (operands[0], tmp, const0_rtx));
DONE;
}")
+
+
+;; Power8 vector instructions encoded as Altivec instructions
+
+;; Vector count leading zeros
+(define_insn "*p8v_clz<mode>2"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (clz:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+ "TARGET_P8_VECTOR"
+ "vclz<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+;; Vector population count
+(define_insn "*p8v_popcount<mode>2"
+ [(set (match_operand:VI2 0 "register_operand" "=v")
+ (popcount:VI2 (match_operand:VI2 1 "register_operand" "v")))]
+ "TARGET_P8_VECTOR"
+ "vpopcnt<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
+
+;; Vector Gather Bits by Bytes by Doubleword
+(define_insn "p8v_vgbbd"
+ [(set (match_operand:V16QI 0 "register_operand" "=v")
+ (unspec:V16QI [(match_operand:V16QI 1 "register_operand" "v")]
+ UNSPEC_VGBBD))]
+ "TARGET_P8_VECTOR"
+ "vgbbd %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "vecsimple")])
diff --git a/gcc/config/rs6000/constraints.md b/gcc/config/rs6000/constraints.md
index 463d69c6ba4..fa53cbb9de7 100644
--- a/gcc/config/rs6000/constraints.md
+++ b/gcc/config/rs6000/constraints.md
@@ -79,12 +79,31 @@
(define_register_constraint "wl" "rs6000_constraints[RS6000_CONSTRAINT_wl]"
"Floating point register if the LFIWAX instruction is enabled or NO_REGS.")
+(define_register_constraint "wm" "rs6000_constraints[RS6000_CONSTRAINT_wm]"
+ "VSX register if direct move instructions are enabled, or NO_REGS.")
+
+(define_register_constraint "wr" "rs6000_constraints[RS6000_CONSTRAINT_wr]"
+ "General purpose register if 64-bit instructions are enabled or NO_REGS.")
+
+(define_register_constraint "wv" "rs6000_constraints[RS6000_CONSTRAINT_wv]"
+ "Altivec register if -mpower8-vector is used or NO_REGS.")
+
(define_register_constraint "wx" "rs6000_constraints[RS6000_CONSTRAINT_wx]"
"Floating point register if the STFIWX instruction is enabled or NO_REGS.")
(define_register_constraint "wz" "rs6000_constraints[RS6000_CONSTRAINT_wz]"
"Floating point register if the LFIWZX instruction is enabled or NO_REGS.")
+;; NO_REGs register constraint, used to merge mov{sd,sf}, since movsd can use
+;; direct move directly, and movsf can't to move between the register sets.
+;; There is a mode_attr that resolves to wm for SDmode and wn for SFmode
+(define_register_constraint "wn" "NO_REGS")
+
+;; Lq/stq validates the address for load/store quad
+(define_memory_constraint "wQ"
+ "Memory operand suitable for the load/store quad instructions"
+ (match_operand 0 "quad_memory_operand"))
+
;; Altivec style load/store that ignores the bottom bits of the address
(define_memory_constraint "wZ"
"Indexed or indirect memory operand, ignoring the bottom 4 bits"
diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md
new file mode 100644
index 00000000000..9f7e4a1b255
--- /dev/null
+++ b/gcc/config/rs6000/crypto.md
@@ -0,0 +1,101 @@
+;; Cryptographic instructions added in ISA 2.07
+;; Copyright (C) 2012-2013 Free Software Foundation, Inc.
+;; Contributed by Michael Meissner (meissner@linux.vnet.ibm.com)
+
+;; This file is part of GCC.
+
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_c_enum "unspec"
+ [UNSPEC_VCIPHER
+ UNSPEC_VNCIPHER
+ UNSPEC_VCIPHERLAST
+ UNSPEC_VNCIPHERLAST
+ UNSPEC_VSBOX
+ UNSPEC_VSHASIGMA
+ UNSPEC_VPERMXOR
+ UNSPEC_VPMSUM])
+
+;; Iterator for VPMSUM/VPERMXOR
+(define_mode_iterator CR_mode [V16QI V8HI V4SI V2DI])
+
+(define_mode_attr CR_char [(V16QI "b")
+ (V8HI "h")
+ (V4SI "w")
+ (V2DI "d")])
+
+;; Iterator for VSHASIGMAD/VSHASIGMAW
+(define_mode_iterator CR_hash [V4SI V2DI])
+
+;; Iterator for the other crypto functions
+(define_int_iterator CR_code [UNSPEC_VCIPHER
+ UNSPEC_VNCIPHER
+ UNSPEC_VCIPHERLAST
+ UNSPEC_VNCIPHERLAST])
+
+(define_int_attr CR_insn [(UNSPEC_VCIPHER "vcipher")
+ (UNSPEC_VNCIPHER "vncipher")
+ (UNSPEC_VCIPHERLAST "vcipherlast")
+ (UNSPEC_VNCIPHERLAST "vncipherlast")])
+
+;; 2 operand crypto instructions
+(define_insn "crypto_<CR_insn>"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")
+ (match_operand:V2DI 2 "register_operand" "v")]
+ CR_code))]
+ "TARGET_CRYPTO"
+ "<CR_insn> %0,%1,%2"
+ [(set_attr "type" "crypto")])
+
+(define_insn "crypto_vpmsum<CR_char>"
+ [(set (match_operand:CR_mode 0 "register_operand" "=v")
+ (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+ (match_operand:CR_mode 2 "register_operand" "v")]
+ UNSPEC_VPMSUM))]
+ "TARGET_CRYPTO"
+ "vpmsum<CR_char> %0,%1,%2"
+ [(set_attr "type" "crypto")])
+
+;; 3 operand crypto instructions
+(define_insn "crypto_vpermxor_<mode>"
+ [(set (match_operand:CR_mode 0 "register_operand" "=v")
+ (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v")
+ (match_operand:CR_mode 2 "register_operand" "v")
+ (match_operand:CR_mode 3 "register_operand" "v")]
+ UNSPEC_VPERMXOR))]
+ "TARGET_CRYPTO"
+ "vpermxor %0,%1,%2,%3"
+ [(set_attr "type" "crypto")])
+
+;; 1 operand crypto instruction
+(define_insn "crypto_vsbox"
+ [(set (match_operand:V2DI 0 "register_operand" "=v")
+ (unspec:V2DI [(match_operand:V2DI 1 "register_operand" "v")]
+ UNSPEC_VSBOX))]
+ "TARGET_CRYPTO"
+ "vsbox %0,%1"
+ [(set_attr "type" "crypto")])
+
+;; Hash crypto instructions
+(define_insn "crypto_vshasigma<CR_char>"
+ [(set (match_operand:CR_hash 0 "register_operand" "=v")
+ (unspec:CR_hash [(match_operand:CR_hash 1 "register_operand" "v")
+ (match_operand:SI 2 "const_0_to_1_operand" "n")
+ (match_operand:SI 3 "const_0_to_15_operand" "n")]
+ UNSPEC_VSHASIGMA))]
+ "TARGET_CRYPTO"
+ "vshasigma<CR_char> %0,%1,%2,%3"
+ [(set_attr "type" "crypto")])
diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c
index e608dce184c..1a173d0b1cc 100644
--- a/gcc/config/rs6000/driver-rs6000.c
+++ b/gcc/config/rs6000/driver-rs6000.c
@@ -167,7 +167,7 @@ elf_platform (void)
if (fd != -1)
{
- char buf[1024];
+ static char buf[1024];
ElfW(auxv_t) *av;
ssize_t n;
diff --git a/gcc/config/rs6000/linux64.h b/gcc/config/rs6000/linux64.h
index 3f280581feb..79f0f0b5f00 100644
--- a/gcc/config/rs6000/linux64.h
+++ b/gcc/config/rs6000/linux64.h
@@ -136,8 +136,11 @@ extern int dot_symbols;
SET_CMODEL (CMODEL_MEDIUM); \
if (rs6000_current_cmodel != CMODEL_SMALL) \
{ \
- TARGET_NO_FP_IN_TOC = 0; \
- TARGET_NO_SUM_IN_TOC = 0; \
+ if (!global_options_set.x_TARGET_NO_FP_IN_TOC) \
+ TARGET_NO_FP_IN_TOC \
+ = rs6000_current_cmodel == CMODEL_MEDIUM; \
+ if (!global_options_set.x_TARGET_NO_SUM_IN_TOC) \
+ TARGET_NO_SUM_IN_TOC = 0; \
} \
} \
} \
diff --git a/gcc/config/rs6000/power8.md b/gcc/config/rs6000/power8.md
new file mode 100644
index 00000000000..83bf7197483
--- /dev/null
+++ b/gcc/config/rs6000/power8.md
@@ -0,0 +1,373 @@
+;; Scheduling description for IBM POWER8 processor.
+;; Copyright (C) 2013 Free Software Foundation, Inc.
+;;
+;; Contributed by Pat Haugen (pthaugen@us.ibm.com).
+
+;; This file is part of GCC.
+;;
+;; GCC is free software; you can redistribute it and/or modify it
+;; under the terms of the GNU General Public License as published
+;; by the Free Software Foundation; either version 3, or (at your
+;; option) any later version.
+;;
+;; GCC is distributed in the hope that it will be useful, but WITHOUT
+;; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+;; or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
+;; License for more details.
+;;
+;; You should have received a copy of the GNU General Public License
+;; along with GCC; see the file COPYING3. If not see
+;; <http://www.gnu.org/licenses/>.
+
+(define_automaton "power8fxu,power8lsu,power8vsu,power8misc")
+
+(define_cpu_unit "fxu0_power8,fxu1_power8" "power8fxu")
+(define_cpu_unit "lu0_power8,lu1_power8" "power8lsu")
+(define_cpu_unit "lsu0_power8,lsu1_power8" "power8lsu")
+(define_cpu_unit "vsu0_power8,vsu1_power8" "power8vsu")
+(define_cpu_unit "bpu_power8,cru_power8" "power8misc")
+(define_cpu_unit "du0_power8,du1_power8,du2_power8,du3_power8,du4_power8,\
+ du5_power8,du6_power8" "power8misc")
+
+
+; Dispatch group reservations
+(define_reservation "DU_any_power8"
+ "du0_power8|du1_power8|du2_power8|du3_power8|du4_power8|\
+ du5_power8")
+
+; 2-way Cracked instructions go in slots 0-1
+; (can also have a second in slots 3-4 if insns are adjacent)
+(define_reservation "DU_cracked_power8"
+ "du0_power8+du1_power8")
+
+; Insns that are first in group
+(define_reservation "DU_first_power8"
+ "du0_power8")
+
+; Insns that are first and last in group
+(define_reservation "DU_both_power8"
+ "du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+\
+ du5_power8+du6_power8")
+
+; Dispatch slots are allocated in order conforming to program order.
+(absence_set "du0_power8" "du1_power8,du2_power8,du3_power8,du4_power8,\
+ du5_power8,du6_power8")
+(absence_set "du1_power8" "du2_power8,du3_power8,du4_power8,du5_power8,\
+ du6_power8")
+(absence_set "du2_power8" "du3_power8,du4_power8,du5_power8,du6_power8")
+(absence_set "du3_power8" "du4_power8,du5_power8,du6_power8")
+(absence_set "du4_power8" "du5_power8,du6_power8")
+(absence_set "du5_power8" "du6_power8")
+
+
+; Execution unit reservations
+(define_reservation "FXU_power8"
+ "fxu0_power8|fxu1_power8")
+
+(define_reservation "LU_power8"
+ "lu0_power8|lu1_power8")
+
+(define_reservation "LSU_power8"
+ "lsu0_power8|lsu1_power8")
+
+(define_reservation "LU_or_LSU_power8"
+ "lu0_power8|lu1_power8|lsu0_power8|lsu1_power8")
+
+(define_reservation "VSU_power8"
+ "vsu0_power8|vsu1_power8")
+
+
+; LS Unit
+(define_insn_reservation "power8-load" 3
+ (and (eq_attr "type" "load")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-load-update" 3
+ (and (eq_attr "type" "load_u,load_ux")
+ (eq_attr "cpu" "power8"))
+ "DU_cracked_power8,LU_or_LSU_power8+FXU_power8")
+
+(define_insn_reservation "power8-load-ext" 3
+ (and (eq_attr "type" "load_ext")
+ (eq_attr "cpu" "power8"))
+ "DU_cracked_power8,LU_or_LSU_power8,FXU_power8")
+
+(define_insn_reservation "power8-load-ext-update" 3
+ (and (eq_attr "type" "load_ext_u,load_ext_ux")
+ (eq_attr "cpu" "power8"))
+ "DU_both_power8,LU_or_LSU_power8+FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-fpload" 5
+ (and (eq_attr "type" "fpload,vecload")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,LU_power8")
+
+(define_insn_reservation "power8-fpload-update" 5
+ (and (eq_attr "type" "fpload_u,fpload_ux")
+ (eq_attr "cpu" "power8"))
+ "DU_cracked_power8,LU_power8+FXU_power8")
+
+(define_insn_reservation "power8-store" 5 ; store-forwarding latency
+ (and (eq_attr "type" "store,store_u")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-store-update-indexed" 5
+ (and (eq_attr "type" "store_ux")
+ (eq_attr "cpu" "power8"))
+ "DU_cracked_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-fpstore" 5
+ (and (eq_attr "type" "fpstore")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-fpstore-update" 5
+ (and (eq_attr "type" "fpstore_u,fpstore_ux")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-vecstore" 5
+ (and (eq_attr "type" "vecstore")
+ (eq_attr "cpu" "power8"))
+ "DU_cracked_power8,LSU_power8+VSU_power8")
+
+(define_insn_reservation "power8-larx" 3
+ (and (eq_attr "type" "load_l")
+ (eq_attr "cpu" "power8"))
+ "DU_both_power8,LU_or_LSU_power8")
+
+(define_insn_reservation "power8-stcx" 10
+ (and (eq_attr "type" "store_c")
+ (eq_attr "cpu" "power8"))
+ "DU_both_power8,LSU_power8+LU_power8")
+
+(define_insn_reservation "power8-sync" 1
+ (and (eq_attr "type" "sync,isync")
+ (eq_attr "cpu" "power8"))
+ "DU_both_power8,LSU_power8")
+
+
+; FX Unit
+(define_insn_reservation "power8-1cyc" 1
+ (and (eq_attr "type" "integer,insert_word,insert_dword,shift,trap,\
+ var_shift_rotate,exts,isel")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 2 "power8-1cyc"
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+ power8-vecstore,power8-larx,power8-stcx")
+; "power8-load,power8-load-update,power8-load-ext,\
+; power8-load-ext-update,power8-fpload,power8-fpload-update,\
+; power8-store,power8-store-update,power8-store-update-indexed,\
+; power8-fpstore,power8-fpstore-update,power8-vecstore,\
+; power8-larx,power8-stcx")
+
+(define_insn_reservation "power8-2cyc" 2
+ (and (eq_attr "type" "cntlz,popcnt")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-two" 2
+ (and (eq_attr "type" "two")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8+DU_any_power8,FXU_power8,FXU_power8")
+
+(define_insn_reservation "power8-three" 3
+ (and (eq_attr "type" "three")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8+DU_any_power8+DU_any_power8,FXU_power8,FXU_power8,FXU_power8")
+
+; cmp - Normal compare insns
+(define_insn_reservation "power8-cmp" 2
+ (and (eq_attr "type" "cmp")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,FXU_power8")
+
+; fast_compare : add./and./nor./etc
+(define_insn_reservation "power8-fast-compare" 2
+ (and (eq_attr "type" "fast_compare")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,FXU_power8")
+
+; compare : rldicl./exts./etc
+; delayed_compare : rlwinm./slwi./etc
+; var_delayed_compare : rlwnm./slw./etc
+(define_insn_reservation "power8-compare" 2
+ (and (eq_attr "type" "compare,delayed_compare,var_delayed_compare")
+ (eq_attr "cpu" "power8"))
+ "DU_cracked_power8,FXU_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 3 "power8-fast-compare,power8-compare"
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+ power8-vecstore,power8-larx,power8-stcx")
+
+; 5 cycle CR latency
+(define_bypass 5 "power8-fast-compare,power8-compare"
+ "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+(define_insn_reservation "power8-mul" 4
+ (and (eq_attr "type" "imul,imul2,imul3,lmul")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,FXU_power8")
+
+(define_insn_reservation "power8-mul-compare" 4
+ (and (eq_attr "type" "imul_compare,lmul_compare")
+ (eq_attr "cpu" "power8"))
+ "DU_cracked_power8,FXU_power8")
+
+; Extra cycle to LU/LSU
+(define_bypass 5 "power8-mul,power8-mul-compare"
+ "power8-load*,power8-fpload*,power8-store*,power8-fpstore*,\
+ power8-vecstore,power8-larx,power8-stcx")
+
+; 7 cycle CR latency
+(define_bypass 7 "power8-mul,power8-mul-compare"
+ "power8-crlogical,power8-mfcr,power8-mfcrf,power8-branch")
+
+; FXU divides are not pipelined
+(define_insn_reservation "power8-idiv" 37
+ (and (eq_attr "type" "idiv")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,fxu0_power8*37|fxu1_power8*37")
+
+(define_insn_reservation "power8-ldiv" 68
+ (and (eq_attr "type" "ldiv")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,fxu0_power8*68|fxu1_power8*68")
+
+(define_insn_reservation "power8-mtjmpr" 5
+ (and (eq_attr "type" "mtjmpr")
+ (eq_attr "cpu" "power8"))
+ "DU_first_power8,FXU_power8")
+
+; Should differentiate between 1 cr field and > 1 since mtocrf is not microcode
+(define_insn_reservation "power8-mtcr" 3
+ (and (eq_attr "type" "mtcr")
+ (eq_attr "cpu" "power8"))
+ "DU_both_power8,FXU_power8")
+
+
+; CR Unit
+(define_insn_reservation "power8-mfjmpr" 5
+ (and (eq_attr "type" "mfjmpr")
+ (eq_attr "cpu" "power8"))
+ "DU_first_power8,cru_power8+FXU_power8")
+
+(define_insn_reservation "power8-crlogical" 3
+ (and (eq_attr "type" "cr_logical,delayed_cr")
+ (eq_attr "cpu" "power8"))
+ "DU_first_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcr" 5
+ (and (eq_attr "type" "mfcr")
+ (eq_attr "cpu" "power8"))
+ "DU_both_power8,cru_power8")
+
+(define_insn_reservation "power8-mfcrf" 3
+ (and (eq_attr "type" "mfcrf")
+ (eq_attr "cpu" "power8"))
+ "DU_first_power8,cru_power8")
+
+
+; BR Unit
+; Branches take dispatch slot 7, but reserve any remaining prior slots to
+; prevent other insns from grabbing them once this is assigned.
+(define_insn_reservation "power8-branch" 3
+ (and (eq_attr "type" "jmpreg,branch")
+ (eq_attr "cpu" "power8"))
+ "(du6_power8\
+ |du5_power8+du6_power8\
+ |du4_power8+du5_power8+du6_power8\
+ |du3_power8+du4_power8+du5_power8+du6_power8\
+ |du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+ |du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+du6_power8\
+ |du0_power8+du1_power8+du2_power8+du3_power8+du4_power8+du5_power8+\
+ du6_power8),bpu_power8")
+
+; Branch updating LR/CTR feeding mf[lr|ctr]
+(define_bypass 4 "power8-branch" "power8-mfjmpr")
+
+
+; VS Unit (includes FP/VSX/VMX/DFP/Crypto)
+(define_insn_reservation "power8-fp" 6
+ (and (eq_attr "type" "fp,dmul")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+; Additional 3 cycles for any CR result
+(define_bypass 9 "power8-fp" "power8-crlogical,power8-mfcr*,power8-branch")
+
+(define_insn_reservation "power8-fpcompare" 8
+ (and (eq_attr "type" "fpcompare")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sdiv" 27
+ (and (eq_attr "type" "sdiv")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-ddiv" 33
+ (and (eq_attr "type" "ddiv")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-sqrt" 32
+ (and (eq_attr "type" "ssqrt")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-dsqrt" 44
+ (and (eq_attr "type" "dsqrt")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecsimple" 2
+ (and (eq_attr "type" "vecperm,vecsimple,veccmp")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecnormal" 6
+ (and (eq_attr "type" "vecfloat,vecdouble")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_bypass 7 "power8-vecnormal"
+ "power8-vecsimple,power8-veccomplex,power8-fpstore*,\
+ power8-vecstore")
+
+(define_insn_reservation "power8-veccomplex" 7
+ (and (eq_attr "type" "veccomplex")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecfdiv" 25
+ (and (eq_attr "type" "vecfdiv")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-vecdiv" 31
+ (and (eq_attr "type" "vecdiv")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mffgpr" 5
+ (and (eq_attr "type" "mffgpr")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-mftgpr" 6
+ (and (eq_attr "type" "mftgpr")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
+(define_insn_reservation "power8-crypto" 7
+ (and (eq_attr "type" "crypto")
+ (eq_attr "cpu" "power8"))
+ "DU_any_power8,VSU_power8")
+
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 78ec1b20913..f47967a48aa 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -166,6 +166,11 @@
(and (match_code "const_int")
(match_test "IN_RANGE (INTVAL (op), 2, 3)")))
+;; Match op = 0..15
+(define_predicate "const_0_to_15_operand"
+ (and (match_code "const_int")
+ (match_test "IN_RANGE (INTVAL (op), 0, 15)")))
+
;; Return 1 if op is a register that is not special.
(define_predicate "gpc_reg_operand"
(match_operand 0 "register_operand")
@@ -182,9 +187,68 @@
if (REGNO (op) >= ARG_POINTER_REGNUM && !CA_REGNO_P (REGNO (op)))
return 1;
+ if (TARGET_VSX && VSX_REGNO_P (REGNO (op)))
+ return 1;
+
return INT_REGNO_P (REGNO (op)) || FP_REGNO_P (REGNO (op));
})
+;; Return 1 if op is a general purpose register. Unlike gpc_reg_operand, don't
+;; allow floating point or vector registers.
+(define_predicate "int_reg_operand"
+ (match_operand 0 "register_operand")
+{
+ if ((TARGET_E500_DOUBLE || TARGET_SPE) && invalid_e500_subreg (op, mode))
+ return 0;
+
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return 0;
+
+ if (REGNO (op) >= FIRST_PSEUDO_REGISTER)
+ return 1;
+
+ return INT_REGNO_P (REGNO (op));
+})
+
+;; Like int_reg_operand, but only return true for base registers
+(define_predicate "base_reg_operand"
+ (match_operand 0 "int_reg_operand")
+{
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return 0;
+
+ return (REGNO (op) != FIRST_GPR_REGNO);
+})
+
+;; Return 1 if op is a general purpose register that is an even register
+;; which suitable for a load/store quad operation
+(define_predicate "quad_int_reg_operand"
+ (match_operand 0 "register_operand")
+{
+ HOST_WIDE_INT r;
+
+ if (!TARGET_QUAD_MEMORY)
+ return 0;
+
+ if (GET_CODE (op) == SUBREG)
+ op = SUBREG_REG (op);
+
+ if (!REG_P (op))
+ return 0;
+
+ r = REGNO (op);
+ if (r >= FIRST_PSEUDO_REGISTER)
+ return 1;
+
+ return (INT_REGNO_P (r) && ((r & 1) == 0));
+})
+
;; Return 1 if op is a register that is a condition register field.
(define_predicate "cc_reg_operand"
(match_operand 0 "register_operand")
@@ -302,6 +366,11 @@
& (~ (unsigned HOST_WIDE_INT) 0xffffffff)) == 0)")
(match_operand 0 "gpc_reg_operand")))
+;; Like reg_or_logical_cint_operand, but allow vsx registers
+(define_predicate "vsx_reg_or_cint_operand"
+ (ior (match_operand 0 "vsx_register_operand")
+ (match_operand 0 "reg_or_logical_cint_operand")))
+
;; Return 1 if operand is a CONST_DOUBLE that can be set in a register
;; with no more than one instruction per word.
(define_predicate "easy_fp_constant"
@@ -458,9 +527,11 @@
(match_test "easy_altivec_constant (op, mode)")))
{
HOST_WIDE_INT val;
+ int elt;
if (mode == V2DImode || mode == V2DFmode)
return 0;
- val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1);
+ elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0;
+ val = const_vector_elt_as_int (op, elt);
val = ((val & 0xff) ^ 0x80) - 0x80;
return EASY_VECTOR_15_ADD_SELF (val);
})
@@ -472,9 +543,11 @@
(match_test "easy_altivec_constant (op, mode)")))
{
HOST_WIDE_INT val;
+ int elt;
if (mode == V2DImode || mode == V2DFmode)
return 0;
- val = const_vector_elt_as_int (op, GET_MODE_NUNITS (mode) - 1);
+ elt = BYTES_BIG_ENDIAN ? GET_MODE_NUNITS (mode) - 1 : 0;
+ val = const_vector_elt_as_int (op, elt);
return EASY_VECTOR_MSB (val, GET_MODE_INNER (mode));
})
@@ -507,6 +580,54 @@
(and (match_operand 0 "memory_operand")
(match_test "offsettable_nonstrict_memref_p (op)")))
+;; Return 1 if the operand is suitable for load/store quad memory.
+(define_predicate "quad_memory_operand"
+ (match_code "mem")
+{
+ rtx addr, op0, op1;
+ int ret;
+
+ if (!TARGET_QUAD_MEMORY)
+ ret = 0;
+
+ else if (!memory_operand (op, mode))
+ ret = 0;
+
+ else if (GET_MODE_SIZE (GET_MODE (op)) != 16)
+ ret = 0;
+
+ else if (MEM_ALIGN (op) < 128)
+ ret = 0;
+
+ else
+ {
+ addr = XEXP (op, 0);
+ if (int_reg_operand (addr, Pmode))
+ ret = 1;
+
+ else if (GET_CODE (addr) != PLUS)
+ ret = 0;
+
+ else
+ {
+ op0 = XEXP (addr, 0);
+ op1 = XEXP (addr, 1);
+ ret = (int_reg_operand (op0, Pmode)
+ && GET_CODE (op1) == CONST_INT
+ && IN_RANGE (INTVAL (op1), -32768, 32767)
+ && (INTVAL (op1) & 15) == 0);
+ }
+ }
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\nquad_memory_operand, ret = %s\n", ret ? "true" : "false");
+ debug_rtx (op);
+ }
+
+ return ret;
+})
+
;; Return 1 if the operand is an indexed or indirect memory operand.
(define_predicate "indexed_or_indirect_operand"
(match_code "mem")
@@ -521,6 +642,19 @@
return indexed_or_indirect_address (op, mode);
})
+;; Like indexed_or_indirect_operand, but also allow a GPR register if direct
+;; moves are supported.
+(define_predicate "reg_or_indexed_operand"
+ (match_code "mem,reg")
+{
+ if (MEM_P (op))
+ return indexed_or_indirect_operand (op, mode);
+ else if (TARGET_DIRECT_MOVE)
+ return register_operand (op, mode);
+ return
+ 0;
+})
+
;; Return 1 if the operand is an indexed or indirect memory operand with an
;; AND -16 in it, used to recognize when we need to switch to Altivec loads
;; to realign loops instead of VSX (altivec silently ignores the bottom bits,
@@ -991,9 +1125,16 @@
GET_MODE (XEXP (op, 0))),
1"))))
+;; Return 1 if OP is a valid comparison operator for "cbranch" instructions.
+;; If we're assuming that FP operations cannot generate user-visible traps,
+;; then on e500 we can use the ordered-signaling instructions to implement
+;; the unordered-quiet FP comparison predicates modulo a reversal.
(define_predicate "rs6000_cbranch_operator"
(if_then_else (match_test "TARGET_HARD_FLOAT && !TARGET_FPRS")
- (match_operand 0 "ordered_comparison_operator")
+ (if_then_else (match_test "flag_trapping_math")
+ (match_operand 0 "ordered_comparison_operator")
+ (ior (match_operand 0 "ordered_comparison_operator")
+ (match_code ("unlt,unle,ungt,unge"))))
(match_operand 0 "comparison_operator")))
;; Return 1 if OP is a comparison operation that is valid for an SCC insn --
diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def
index a545fe3e448..1a5a709751d 100644
--- a/gcc/config/rs6000/rs6000-builtin.def
+++ b/gcc/config/rs6000/rs6000-builtin.def
@@ -30,7 +30,7 @@
RS6000_BUILTIN_A -- ABS builtins
RS6000_BUILTIN_D -- DST builtins
RS6000_BUILTIN_E -- SPE EVSEL builtins.
- RS6000_BUILTIN_P -- Altivec and VSX predicate builtins
+ RS6000_BUILTIN_P -- Altivec, VSX, ISA 2.07 vector predicate builtins
RS6000_BUILTIN_Q -- Paired floating point VSX predicate builtins
RS6000_BUILTIN_S -- SPE predicate builtins
RS6000_BUILTIN_X -- special builtins
@@ -301,6 +301,108 @@
| RS6000_BTC_SPECIAL), \
CODE_FOR_nothing) /* ICODE */
+/* ISA 2.07 (power8) vector convenience macros. */
+/* For the instructions that are encoded as altivec instructions use
+ __builtin_altivec_ as the builtin name. */
+#define BU_P8V_AV_1(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_altivec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_UNARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_P8V_AV_2(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_2 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_altivec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_BINARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_P8V_AV_P(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_P (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_altivec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_PREDICATE), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
+/* For the instructions encoded as VSX instructions use __builtin_vsx as the
+ builtin name. */
+#define BU_P8V_VSX_1(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_1 (P8V_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_vsx_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_UNARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_P8V_OVERLOAD_1(ENUM, NAME) \
+ RS6000_BUILTIN_1 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
+ "__builtin_vec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
+ | RS6000_BTC_UNARY), \
+ CODE_FOR_nothing) /* ICODE */
+
+#define BU_P8V_OVERLOAD_2(ENUM, NAME) \
+ RS6000_BUILTIN_2 (P8V_BUILTIN_VEC_ ## ENUM, /* ENUM */ \
+ "__builtin_vec_" NAME, /* NAME */ \
+ RS6000_BTM_P8_VECTOR, /* MASK */ \
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
+ | RS6000_BTC_BINARY), \
+ CODE_FOR_nothing) /* ICODE */
+
+/* Crypto convenience macros. */
+#define BU_CRYPTO_1(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_crypto_" NAME, /* NAME */ \
+ RS6000_BTM_CRYPTO, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_UNARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_CRYPTO_2(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_crypto_" NAME, /* NAME */ \
+ RS6000_BTM_CRYPTO, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_BINARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE) \
+ RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_crypto_" NAME, /* NAME */ \
+ RS6000_BTM_CRYPTO, /* MASK */ \
+ (RS6000_BTC_ ## ATTR /* ATTR */ \
+ | RS6000_BTC_TERNARY), \
+ CODE_FOR_ ## ICODE) /* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_1(ENUM, NAME) \
+ RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_crypto_" NAME, /* NAME */ \
+ RS6000_BTM_CRYPTO, /* MASK */ \
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
+ | RS6000_BTC_UNARY), \
+ CODE_FOR_nothing) /* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME) \
+ RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_crypto_" NAME, /* NAME */ \
+ RS6000_BTM_CRYPTO, /* MASK */ \
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
+ | RS6000_BTC_BINARY), \
+ CODE_FOR_nothing) /* ICODE */
+
+#define BU_CRYPTO_OVERLOAD_3(ENUM, NAME) \
+ RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \
+ "__builtin_crypto_" NAME, /* NAME */ \
+ RS6000_BTM_CRYPTO, /* MASK */ \
+ (RS6000_BTC_OVERLOADED /* ATTR */ \
+ | RS6000_BTC_TERNARY), \
+ CODE_FOR_nothing) /* ICODE */
+
/* SPE convenience macros. */
#define BU_SPE_1(ENUM, NAME, ATTR, ICODE) \
RS6000_BUILTIN_1 (SPE_BUILTIN_ ## ENUM, /* ENUM */ \
@@ -1012,7 +1114,7 @@ BU_VSX_1 (XVTSQRTSP_FG, "xvtsqrtsp_fg", CONST, vsx_tsqrtv4sf2_fg)
BU_VSX_1 (XVRESP, "xvresp", CONST, vsx_frev4sf2)
BU_VSX_1 (XSCVDPSP, "xscvdpsp", CONST, vsx_xscvdpsp)
-BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvdpsp)
+BU_VSX_1 (XSCVSPDP, "xscvspdp", CONST, vsx_xscvspdp)
BU_VSX_1 (XVCVDPSP, "xvcvdpsp", CONST, vsx_xvcvdpsp)
BU_VSX_1 (XVCVSPDP, "xvcvspdp", CONST, vsx_xvcvspdp)
BU_VSX_1 (XSTSQRTDP_FE, "xstsqrtdp_fe", CONST, vsx_tsqrtdf2_fe)
@@ -1132,6 +1234,139 @@ BU_VSX_OVERLOAD_2 (XXSPLTW, "xxspltw")
BU_VSX_OVERLOAD_X (LD, "ld")
BU_VSX_OVERLOAD_X (ST, "st")
+/* 1 argument VSX instructions added in ISA 2.07. */
+BU_P8V_VSX_1 (XSCVSPDPN, "xscvspdpn", CONST, vsx_xscvspdpn)
+BU_P8V_VSX_1 (XSCVDPSPN, "xscvdpspn", CONST, vsx_xscvdpspn)
+
+/* 1 argument altivec instructions added in ISA 2.07. */
+BU_P8V_AV_1 (ABS_V2DI, "abs_v2di", CONST, absv2di2)
+BU_P8V_AV_1 (VUPKHSW, "vupkhsw", CONST, altivec_vupkhsw)
+BU_P8V_AV_1 (VUPKLSW, "vupklsw", CONST, altivec_vupklsw)
+BU_P8V_AV_1 (VCLZB, "vclzb", CONST, clzv16qi2)
+BU_P8V_AV_1 (VCLZH, "vclzh", CONST, clzv8hi2)
+BU_P8V_AV_1 (VCLZW, "vclzw", CONST, clzv4si2)
+BU_P8V_AV_1 (VCLZD, "vclzd", CONST, clzv2di2)
+BU_P8V_AV_1 (VPOPCNTB, "vpopcntb", CONST, popcountv16qi2)
+BU_P8V_AV_1 (VPOPCNTH, "vpopcnth", CONST, popcountv8hi2)
+BU_P8V_AV_1 (VPOPCNTW, "vpopcntw", CONST, popcountv4si2)
+BU_P8V_AV_1 (VPOPCNTD, "vpopcntd", CONST, popcountv2di2)
+BU_P8V_AV_1 (VGBBD, "vgbbd", CONST, p8v_vgbbd)
+
+/* 2 argument altivec instructions added in ISA 2.07. */
+BU_P8V_AV_2 (VADDUDM, "vaddudm", CONST, addv2di3)
+BU_P8V_AV_2 (VMINSD, "vminsd", CONST, sminv2di3)
+BU_P8V_AV_2 (VMAXSD, "vmaxsd", CONST, smaxv2di3)
+BU_P8V_AV_2 (VMINUD, "vminud", CONST, uminv2di3)
+BU_P8V_AV_2 (VMAXUD, "vmaxud", CONST, umaxv2di3)
+BU_P8V_AV_2 (VMRGEW, "vmrgew", CONST, p8_vmrgew)
+BU_P8V_AV_2 (VMRGOW, "vmrgow", CONST, p8_vmrgow)
+BU_P8V_AV_2 (VPKUDUM, "vpkudum", CONST, altivec_vpkudum)
+BU_P8V_AV_2 (VPKSDSS, "vpksdss", CONST, altivec_vpksdss)
+BU_P8V_AV_2 (VPKUDUS, "vpkudus", CONST, altivec_vpkudus)
+BU_P8V_AV_2 (VPKSDUS, "vpksdus", CONST, altivec_vpkswus)
+BU_P8V_AV_2 (VRLD, "vrld", CONST, vrotlv2di3)
+BU_P8V_AV_2 (VSLD, "vsld", CONST, vashlv2di3)
+BU_P8V_AV_2 (VSRD, "vsrd", CONST, vlshrv2di3)
+BU_P8V_AV_2 (VSRAD, "vsrad", CONST, vashrv2di3)
+BU_P8V_AV_2 (VSUBUDM, "vsubudm", CONST, subv2di3)
+
+BU_P8V_AV_2 (EQV_V16QI, "eqv_v16qi", CONST, eqvv16qi3)
+BU_P8V_AV_2 (EQV_V8HI, "eqv_v8hi", CONST, eqvv8hi3)
+BU_P8V_AV_2 (EQV_V4SI, "eqv_v4si", CONST, eqvv4si3)
+BU_P8V_AV_2 (EQV_V2DI, "eqv_v2di", CONST, eqvv2di3)
+BU_P8V_AV_2 (EQV_V4SF, "eqv_v4sf", CONST, eqvv4sf3)
+BU_P8V_AV_2 (EQV_V2DF, "eqv_v2df", CONST, eqvv2df3)
+
+BU_P8V_AV_2 (NAND_V16QI, "nand_v16qi", CONST, nandv16qi3)
+BU_P8V_AV_2 (NAND_V8HI, "nand_v8hi", CONST, nandv8hi3)
+BU_P8V_AV_2 (NAND_V4SI, "nand_v4si", CONST, nandv4si3)
+BU_P8V_AV_2 (NAND_V2DI, "nand_v2di", CONST, nandv2di3)
+BU_P8V_AV_2 (NAND_V4SF, "nand_v4sf", CONST, nandv4sf3)
+BU_P8V_AV_2 (NAND_V2DF, "nand_v2df", CONST, nandv2df3)
+
+BU_P8V_AV_2 (ORC_V16QI, "orc_v16qi", CONST, orcv16qi3)
+BU_P8V_AV_2 (ORC_V8HI, "orc_v8hi", CONST, orcv8hi3)
+BU_P8V_AV_2 (ORC_V4SI, "orc_v4si", CONST, orcv4si3)
+BU_P8V_AV_2 (ORC_V2DI, "orc_v2di", CONST, orcv2di3)
+BU_P8V_AV_2 (ORC_V4SF, "orc_v4sf", CONST, orcv4sf3)
+BU_P8V_AV_2 (ORC_V2DF, "orc_v2df", CONST, orcv2df3)
+
+/* Vector comparison instructions added in ISA 2.07. */
+BU_P8V_AV_2 (VCMPEQUD, "vcmpequd", CONST, vector_eqv2di)
+BU_P8V_AV_2 (VCMPGTSD, "vcmpgtsd", CONST, vector_gtv2di)
+BU_P8V_AV_2 (VCMPGTUD, "vcmpgtud", CONST, vector_gtuv2di)
+
+/* Vector comparison predicate instructions added in ISA 2.07. */
+BU_P8V_AV_P (VCMPEQUD_P, "vcmpequd_p", CONST, vector_eq_v2di_p)
+BU_P8V_AV_P (VCMPGTSD_P, "vcmpgtsd_p", CONST, vector_gt_v2di_p)
+BU_P8V_AV_P (VCMPGTUD_P, "vcmpgtud_p", CONST, vector_gtu_v2di_p)
+
+/* ISA 2.07 vector overloaded 1 argument functions. */
+BU_P8V_OVERLOAD_1 (VUPKHSW, "vupkhsw")
+BU_P8V_OVERLOAD_1 (VUPKLSW, "vupklsw")
+BU_P8V_OVERLOAD_1 (VCLZ, "vclz")
+BU_P8V_OVERLOAD_1 (VCLZB, "vclzb")
+BU_P8V_OVERLOAD_1 (VCLZH, "vclzh")
+BU_P8V_OVERLOAD_1 (VCLZW, "vclzw")
+BU_P8V_OVERLOAD_1 (VCLZD, "vclzd")
+BU_P8V_OVERLOAD_1 (VPOPCNT, "vpopcnt")
+BU_P8V_OVERLOAD_1 (VPOPCNTB, "vpopcntb")
+BU_P8V_OVERLOAD_1 (VPOPCNTH, "vpopcnth")
+BU_P8V_OVERLOAD_1 (VPOPCNTW, "vpopcntw")
+BU_P8V_OVERLOAD_1 (VPOPCNTD, "vpopcntd")
+BU_P8V_OVERLOAD_1 (VGBBD, "vgbbd")
+
+/* ISA 2.07 vector overloaded 2 argument functions. */
+BU_P8V_OVERLOAD_2 (EQV, "eqv")
+BU_P8V_OVERLOAD_2 (NAND, "nand")
+BU_P8V_OVERLOAD_2 (ORC, "orc")
+BU_P8V_OVERLOAD_2 (VADDUDM, "vaddudm")
+BU_P8V_OVERLOAD_2 (VMAXSD, "vmaxsd")
+BU_P8V_OVERLOAD_2 (VMAXUD, "vmaxud")
+BU_P8V_OVERLOAD_2 (VMINSD, "vminsd")
+BU_P8V_OVERLOAD_2 (VMINUD, "vminud")
+BU_P8V_OVERLOAD_2 (VMRGEW, "vmrgew")
+BU_P8V_OVERLOAD_2 (VMRGOW, "vmrgow")
+BU_P8V_OVERLOAD_2 (VPKSDSS, "vpksdss")
+BU_P8V_OVERLOAD_2 (VPKSDUS, "vpksdus")
+BU_P8V_OVERLOAD_2 (VPKUDUM, "vpkudum")
+BU_P8V_OVERLOAD_2 (VPKUDUS, "vpkudus")
+BU_P8V_OVERLOAD_2 (VRLD, "vrld")
+BU_P8V_OVERLOAD_2 (VSLD, "vsld")
+BU_P8V_OVERLOAD_2 (VSRAD, "vsrad")
+BU_P8V_OVERLOAD_2 (VSRD, "vsrd")
+BU_P8V_OVERLOAD_2 (VSUBUDM, "vsubudm")
+
+
+/* 1 argument crypto functions. */
+BU_CRYPTO_1 (VSBOX, "vsbox", CONST, crypto_vsbox)
+
+/* 2 argument crypto functions. */
+BU_CRYPTO_2 (VCIPHER, "vcipher", CONST, crypto_vcipher)
+BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast", CONST, crypto_vcipherlast)
+BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher)
+BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlast", CONST, crypto_vncipherlast)
+BU_CRYPTO_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb)
+BU_CRYPTO_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh)
+BU_CRYPTO_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw)
+BU_CRYPTO_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd)
+
+/* 3 argument crypto functions. */
+BU_CRYPTO_3 (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di)
+BU_CRYPTO_3 (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si)
+BU_CRYPTO_3 (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi)
+BU_CRYPTO_3 (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi)
+BU_CRYPTO_3 (VSHASIGMAW, "vshasigmaw", CONST, crypto_vshasigmaw)
+BU_CRYPTO_3 (VSHASIGMAD, "vshasigmad", CONST, crypto_vshasigmad)
+
+/* 2 argument crypto overloaded functions. */
+BU_CRYPTO_OVERLOAD_2 (VPMSUM, "vpmsum")
+
+/* 3 argument crypto overloaded functions. */
+BU_CRYPTO_OVERLOAD_3 (VPERMXOR, "vpermxor")
+BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma")
+
+
/* 3 argument paired floating point builtins. */
BU_PAIRED_3 (MSUB, "msub", FP, fmsv2sf4)
BU_PAIRED_3 (MADD, "madd", FP, fmav2sf4)
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index a4f66ba8f1b..593b772ebd1 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -315,6 +315,8 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR6X");
if ((flags & OPTION_MASK_POPCNTD) != 0)
rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR7");
+ if ((flags & OPTION_MASK_DIRECT_MOVE) != 0)
+ rs6000_define_or_undefine_macro (define_p, "_ARCH_PWR8");
if ((flags & OPTION_MASK_SOFT_FLOAT) != 0)
rs6000_define_or_undefine_macro (define_p, "_SOFT_FLOAT");
if ((flags & OPTION_MASK_RECIP_PRECISION) != 0)
@@ -331,6 +333,10 @@ rs6000_target_modify_macros (bool define_p, HOST_WIDE_INT flags,
}
if ((flags & OPTION_MASK_VSX) != 0)
rs6000_define_or_undefine_macro (define_p, "__VSX__");
+ if ((flags & OPTION_MASK_P8_VECTOR) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__POWER8_VECTOR__");
+ if ((flags & OPTION_MASK_CRYPTO) != 0)
+ rs6000_define_or_undefine_macro (define_p, "__CRYPTO__");
/* options from the builtin masks. */
if ((bu_mask & RS6000_BTM_SPE) != 0)
@@ -505,6 +511,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SI,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_ABS, P8V_BUILTIN_ABS_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_ABS, ALTIVEC_BUILTIN_ABS_V4SF,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_ABS, VSX_BUILTIN_XVABSDP,
@@ -577,12 +585,24 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHSH,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_UNPACKH, ALTIVEC_BUILTIN_VUPKHPX,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_pixel_V8HI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_VUPKHSH, ALTIVEC_BUILTIN_VUPKHSH,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_UNPACKH, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_VUPKHSH, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_VUPKHPX, ALTIVEC_BUILTIN_VUPKHPX,
@@ -601,6 +621,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V8HI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_UNPACKL, ALTIVEC_BUILTIN_VUPKLSH,
RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V8HI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { ALTIVEC_BUILTIN_VEC_UNPACKL, P8V_BUILTIN_VUPKLSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V8HI, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_VUPKLPX, ALTIVEC_BUILTIN_VUPKLPX,
@@ -651,6 +675,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDUWM,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_ADD, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_ADD, ALTIVEC_BUILTIN_VADDFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_ADD, VSX_BUILTIN_XVADDDP,
@@ -937,6 +973,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQUW,
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPEQ, P8V_BUILTIN_VCMPEQUD,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPEQ, ALTIVEC_BUILTIN_VCMPEQFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPEQ, VSX_BUILTIN_XVCMPEQDP,
@@ -975,6 +1015,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTSW,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTUD,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPGT, P8V_BUILTIN_VCMPGTSD,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPGT, ALTIVEC_BUILTIN_VCMPGTFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPGT, VSX_BUILTIN_XVCMPGTDP,
@@ -1021,6 +1065,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_bool_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTSW,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTUD,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_CMPLT, P8V_BUILTIN_VCMPGTSD,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLT, ALTIVEC_BUILTIN_VCMPGTFP,
RS6000_BTI_bool_V4SI, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_CMPLT, VSX_BUILTIN_XVCMPGTDP,
@@ -1418,6 +1466,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXSW,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MAX, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_MAX, ALTIVEC_BUILTIN_VMAXFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_MAX, VSX_BUILTIN_XVMAXDP,
@@ -1604,6 +1664,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINSW,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_MIN, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_MIN, ALTIVEC_BUILTIN_VMINFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_MIN, VSX_BUILTIN_XVMINDP,
@@ -1786,6 +1858,12 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_PACK, ALTIVEC_BUILTIN_VPKUWUM,
RS6000_BTI_bool_V8HI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_PACK, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKUWUM, ALTIVEC_BUILTIN_VPKUWUM,
@@ -1812,6 +1890,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKUWUS, ALTIVEC_BUILTIN_VPKUWUS,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKUDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_PACKS, P8V_BUILTIN_VPKSDSS,
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKSHSS, ALTIVEC_BUILTIN_VPKSHSS,
RS6000_BTI_V16QI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKUHUS, ALTIVEC_BUILTIN_VPKUHUS,
@@ -1824,6 +1906,8 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_PACKSU, ALTIVEC_BUILTIN_VPKSWUS,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_PACKSU, P8V_BUILTIN_VPKSDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKSWUS, ALTIVEC_BUILTIN_VPKSWUS,
RS6000_BTI_unsigned_V8HI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VPKSHUS, ALTIVEC_BUILTIN_VPKSHUS,
@@ -1844,6 +1928,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_RL, ALTIVEC_BUILTIN_VRLW,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_RL, P8V_BUILTIN_VRLD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VRLW, ALTIVEC_BUILTIN_VRLW,
@@ -1868,6 +1956,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_SL, ALTIVEC_BUILTIN_VSLW,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SL, P8V_BUILTIN_VSLD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTDP,
RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0, 0 },
{ ALTIVEC_BUILTIN_VEC_SQRT, VSX_BUILTIN_XVSQRTSP,
@@ -2032,6 +2124,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_SR, ALTIVEC_BUILTIN_VSRW,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SR, P8V_BUILTIN_VSRD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSRW, ALTIVEC_BUILTIN_VSRW,
@@ -2056,6 +2152,10 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_SRA, ALTIVEC_BUILTIN_VSRAW,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRAD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SRA, P8V_BUILTIN_VSRD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_VSRAW, ALTIVEC_BUILTIN_VSRAW,
@@ -2196,6 +2296,18 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI, 0 },
{ ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBUWM,
RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { ALTIVEC_BUILTIN_VEC_SUB, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
{ ALTIVEC_BUILTIN_VEC_SUB, ALTIVEC_BUILTIN_VSUBFP,
RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
{ ALTIVEC_BUILTIN_VEC_SUB, VSX_BUILTIN_XVSUBDP,
@@ -3327,6 +3439,20 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQUW_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V4SI },
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPEQ_P, P8V_BUILTIN_VCMPEQUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI },
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, ALTIVEC_BUILTIN_VCMPEQFP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
{ ALTIVEC_BUILTIN_VEC_VCMPEQ_P, VSX_BUILTIN_XVCMPEQDP_P,
@@ -3372,11 +3498,455 @@ const struct altivec_builtin_types altivec_overloaded_builtins[] = {
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI },
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGTSW_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SI, RS6000_BTI_V4SI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTUD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI },
+ { ALTIVEC_BUILTIN_VEC_VCMPGE_P, P8V_BUILTIN_VCMPGTSD_P,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DI, RS6000_BTI_V2DI },
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, ALTIVEC_BUILTIN_VCMPGEFP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V4SF, RS6000_BTI_V4SF },
{ ALTIVEC_BUILTIN_VEC_VCMPGE_P, VSX_BUILTIN_XVCMPGEDP_P,
RS6000_BTI_INTSI, RS6000_BTI_INTSI, RS6000_BTI_V2DF, RS6000_BTI_V2DF },
+ /* Power8 vector overloaded functions. */
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_EQV, P8V_BUILTIN_EQV_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_NAND, P8V_BUILTIN_NAND_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_bool_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_bool_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_bool_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_bool_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_bool_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_bool_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V4SF,
+ RS6000_BTI_V4SF, RS6000_BTI_V4SF, RS6000_BTI_V4SF, 0 },
+ { P8V_BUILTIN_VEC_ORC, P8V_BUILTIN_ORC_V2DF,
+ RS6000_BTI_V2DF, RS6000_BTI_V2DF, RS6000_BTI_V2DF, 0 },
+
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VADDUDM, P8V_BUILTIN_VADDUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZ, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZB, P8V_BUILTIN_VCLZB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZH, P8V_BUILTIN_VCLZH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZW, P8V_BUILTIN_VCLZW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VCLZD, P8V_BUILTIN_VCLZD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINSD, P8V_BUILTIN_VMINSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXSD, P8V_BUILTIN_VMAXSD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMINUD, P8V_BUILTIN_VMINUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VMAXUD, P8V_BUILTIN_VMAXUD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMRGEW, P8V_BUILTIN_VMRGEW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0 },
+ { P8V_BUILTIN_VEC_VMRGOW, P8V_BUILTIN_VMRGOW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNT, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_V16QI, RS6000_BTI_V16QI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTB, P8V_BUILTIN_VPOPCNTB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_V8HI, RS6000_BTI_V8HI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTH, P8V_BUILTIN_VPOPCNTH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_V4SI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTW, P8V_BUILTIN_VPOPCNTW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0, 0 },
+ { P8V_BUILTIN_VEC_VPOPCNTD, P8V_BUILTIN_VPOPCNTD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VPKUDUM, P8V_BUILTIN_VPKUDUM,
+ RS6000_BTI_bool_V4SI, RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKSDSS, P8V_BUILTIN_VPKSDSS,
+ RS6000_BTI_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKUDUS, P8V_BUILTIN_VPKUDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VPKSDUS, P8V_BUILTIN_VPKSDUS,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VRLD, P8V_BUILTIN_VRLD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSLD, P8V_BUILTIN_VSLD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSRD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRAD,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSRAD, P8V_BUILTIN_VSRD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_V2DI, RS6000_BTI_V2DI, RS6000_BTI_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_bool_V2DI, 0 },
+ { P8V_BUILTIN_VEC_VSUBUDM, P8V_BUILTIN_VSUBUDM,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI, 0 },
+
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VUPKHSW, P8V_BUILTIN_VUPKHSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+ RS6000_BTI_V2DI, RS6000_BTI_V4SI, 0, 0 },
+ { P8V_BUILTIN_VEC_VUPKLSW, P8V_BUILTIN_VUPKLSW,
+ RS6000_BTI_bool_V2DI, RS6000_BTI_bool_V4SI, 0, 0 },
+
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_V16QI, 0, 0, 0 },
+ { P8V_BUILTIN_VEC_VGBBD, P8V_BUILTIN_VGBBD,
+ RS6000_BTI_unsigned_V16QI, 0, 0, 0 },
+
+ /* Crypto builtins. */
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI },
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI },
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI },
+ { CRYPTO_BUILTIN_VPERMXOR, CRYPTO_BUILTIN_VPERMXOR_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI },
+
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMB,
+ RS6000_BTI_unsigned_V16QI, RS6000_BTI_unsigned_V16QI,
+ RS6000_BTI_unsigned_V16QI, 0 },
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMH,
+ RS6000_BTI_unsigned_V8HI, RS6000_BTI_unsigned_V8HI,
+ RS6000_BTI_unsigned_V8HI, 0 },
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_unsigned_V4SI, 0 },
+ { CRYPTO_BUILTIN_VPMSUM, CRYPTO_BUILTIN_VPMSUMD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_unsigned_V2DI, 0 },
+
+ { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAW,
+ RS6000_BTI_unsigned_V4SI, RS6000_BTI_unsigned_V4SI,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+ { CRYPTO_BUILTIN_VSHASIGMA, CRYPTO_BUILTIN_VSHASIGMAD,
+ RS6000_BTI_unsigned_V2DI, RS6000_BTI_unsigned_V2DI,
+ RS6000_BTI_INTSI, RS6000_BTI_INTSI },
+
{ (enum rs6000_builtins) 0, (enum rs6000_builtins) 0, 0, 0, 0, 0 }
};
@@ -3650,11 +4220,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
TREE_USED (decl) = 1;
TREE_TYPE (decl) = arg1_type;
TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
- DECL_INITIAL (decl) = arg1;
- stmt = build1 (DECL_EXPR, arg1_type, decl);
- TREE_ADDRESSABLE (decl) = 1;
- SET_EXPR_LOCATION (stmt, loc);
- stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+ if (c_dialect_cxx ())
+ {
+ stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+ NULL_TREE, NULL_TREE);
+ SET_EXPR_LOCATION (stmt, loc);
+ }
+ else
+ {
+ DECL_INITIAL (decl) = arg1;
+ stmt = build1 (DECL_EXPR, arg1_type, decl);
+ TREE_ADDRESSABLE (decl) = 1;
+ SET_EXPR_LOCATION (stmt, loc);
+ stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+ }
innerptrtype = build_pointer_type (arg1_inner_type);
@@ -3729,11 +4308,20 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
TREE_USED (decl) = 1;
TREE_TYPE (decl) = arg1_type;
TREE_READONLY (decl) = TYPE_READONLY (arg1_type);
- DECL_INITIAL (decl) = arg1;
- stmt = build1 (DECL_EXPR, arg1_type, decl);
- TREE_ADDRESSABLE (decl) = 1;
- SET_EXPR_LOCATION (stmt, loc);
- stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+ if (c_dialect_cxx ())
+ {
+ stmt = build4 (TARGET_EXPR, arg1_type, decl, arg1,
+ NULL_TREE, NULL_TREE);
+ SET_EXPR_LOCATION (stmt, loc);
+ }
+ else
+ {
+ DECL_INITIAL (decl) = arg1;
+ stmt = build1 (DECL_EXPR, arg1_type, decl);
+ TREE_ADDRESSABLE (decl) = 1;
+ SET_EXPR_LOCATION (stmt, loc);
+ stmt = build1 (COMPOUND_LITERAL_EXPR, arg1_type, stmt);
+ }
innerptrtype = build_pointer_type (arg1_inner_type);
@@ -3824,7 +4412,8 @@ altivec_resolve_overloaded_builtin (location_t loc, tree fndecl,
&& (desc->op2 == RS6000_BTI_NOT_OPAQUE
|| rs6000_builtin_type_compatible (types[1], desc->op2))
&& (desc->op3 == RS6000_BTI_NOT_OPAQUE
- || rs6000_builtin_type_compatible (types[2], desc->op3)))
+ || rs6000_builtin_type_compatible (types[2], desc->op3))
+ && rs6000_builtin_decls[desc->overloaded_code] != NULL_TREE)
return altivec_build_resolved_builtin (args, n, desc);
bad:
diff --git a/gcc/config/rs6000/rs6000-cpus.def b/gcc/config/rs6000/rs6000-cpus.def
index 0564018b3f0..08346b61d17 100644
--- a/gcc/config/rs6000/rs6000-cpus.def
+++ b/gcc/config/rs6000/rs6000-cpus.def
@@ -28,7 +28,7 @@
ALTIVEC, since in general it isn't a win on power6. In ISA 2.04, fsel,
fre, fsqrt, etc. were no longer documented as optional. Group masks by
server and embedded. */
-#define ISA_2_5_MASKS_EMBEDDED (ISA_2_2_MASKS \
+#define ISA_2_5_MASKS_EMBEDDED (ISA_2_4_MASKS \
| OPTION_MASK_CMPB \
| OPTION_MASK_RECIP_PRECISION \
| OPTION_MASK_PPC_GFXOPT \
@@ -45,6 +45,14 @@
| OPTION_MASK_VSX \
| OPTION_MASK_VSX_TIMODE)
+/* For now, don't provide an embedded version of ISA 2.07. */
+#define ISA_2_7_MASKS_SERVER (ISA_2_6_MASKS_SERVER \
+ | OPTION_MASK_P8_FUSION \
+ | OPTION_MASK_P8_VECTOR \
+ | OPTION_MASK_CRYPTO \
+ | OPTION_MASK_DIRECT_MOVE \
+ | OPTION_MASK_QUAD_MEMORY)
+
#define POWERPC_7400_MASK (OPTION_MASK_PPC_GFXOPT | OPTION_MASK_ALTIVEC)
/* Deal with ports that do not have -mstrict-align. */
@@ -61,7 +69,9 @@
/* Mask of all options to set the default isa flags based on -mcpu=<xxx>. */
#define POWERPC_MASKS (OPTION_MASK_ALTIVEC \
| OPTION_MASK_CMPB \
+ | OPTION_MASK_CRYPTO \
| OPTION_MASK_DFP \
+ | OPTION_MASK_DIRECT_MOVE \
| OPTION_MASK_DLMZB \
| OPTION_MASK_FPRND \
| OPTION_MASK_ISEL \
@@ -69,11 +79,14 @@
| OPTION_MASK_MFPGPR \
| OPTION_MASK_MULHW \
| OPTION_MASK_NO_UPDATE \
+ | OPTION_MASK_P8_FUSION \
+ | OPTION_MASK_P8_VECTOR \
| OPTION_MASK_POPCNTB \
| OPTION_MASK_POPCNTD \
| OPTION_MASK_POWERPC64 \
| OPTION_MASK_PPC_GFXOPT \
| OPTION_MASK_PPC_GPOPT \
+ | OPTION_MASK_QUAD_MEMORY \
| OPTION_MASK_RECIP_PRECISION \
| OPTION_MASK_SOFT_FLOAT \
| OPTION_MASK_STRICT_ALIGN_OPTIONAL \
@@ -168,10 +181,7 @@ RS6000_CPU ("power7", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */
POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
| MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
| MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE)
-RS6000_CPU ("power8", PROCESSOR_POWER7, /* Don't add MASK_ISEL by default */
- POWERPC_7400_MASK | MASK_POWERPC64 | MASK_PPC_GPOPT | MASK_MFCRF
- | MASK_POPCNTB | MASK_FPRND | MASK_CMPB | MASK_DFP | MASK_POPCNTD
- | MASK_VSX | MASK_RECIP_PRECISION | MASK_VSX_TIMODE)
+RS6000_CPU ("power8", PROCESSOR_POWER8, MASK_POWERPC64 | ISA_2_7_MASKS_SERVER)
RS6000_CPU ("powerpc", PROCESSOR_POWERPC, 0)
RS6000_CPU ("powerpc64", PROCESSOR_POWERPC64, MASK_PPC_GFXOPT | MASK_POWERPC64)
RS6000_CPU ("rs64", PROCESSOR_RS64A, MASK_PPC_GFXOPT | MASK_POWERPC64)
diff --git a/gcc/config/rs6000/rs6000-opts.h b/gcc/config/rs6000/rs6000-opts.h
index fc843fd19ca..d528a4fd87a 100644
--- a/gcc/config/rs6000/rs6000-opts.h
+++ b/gcc/config/rs6000/rs6000-opts.h
@@ -30,21 +30,22 @@
/* Processor type. Order must match cpu attribute in MD file. */
enum processor_type
{
- PROCESSOR_RS64A,
- PROCESSOR_MPCCORE,
- PROCESSOR_PPC403,
- PROCESSOR_PPC405,
- PROCESSOR_PPC440,
- PROCESSOR_PPC476,
PROCESSOR_PPC601,
PROCESSOR_PPC603,
PROCESSOR_PPC604,
PROCESSOR_PPC604e,
PROCESSOR_PPC620,
PROCESSOR_PPC630,
+
PROCESSOR_PPC750,
PROCESSOR_PPC7400,
PROCESSOR_PPC7450,
+
+ PROCESSOR_PPC403,
+ PROCESSOR_PPC405,
+ PROCESSOR_PPC440,
+ PROCESSOR_PPC476,
+
PROCESSOR_PPC8540,
PROCESSOR_PPC8548,
PROCESSOR_PPCE300C2,
@@ -53,15 +54,21 @@ enum processor_type
PROCESSOR_PPCE500MC64,
PROCESSOR_PPCE5500,
PROCESSOR_PPCE6500,
+
PROCESSOR_POWER4,
PROCESSOR_POWER5,
PROCESSOR_POWER6,
PROCESSOR_POWER7,
+ PROCESSOR_POWER8,
+
+ PROCESSOR_RS64A,
+ PROCESSOR_MPCCORE,
PROCESSOR_CELL,
PROCESSOR_PPCA2,
PROCESSOR_TITAN
};
+
/* FP processor type. */
enum fpu_type_t
{
@@ -131,11 +138,14 @@ enum rs6000_cmodel {
CMODEL_LARGE
};
-/* Describe which vector unit to use for a given machine mode. */
+/* Describe which vector unit to use for a given machine mode. The
+ VECTOR_MEM_* and VECTOR_UNIT_* macros assume that Altivec, VSX, and
+ P8_VECTOR are contiguous. */
enum rs6000_vector {
VECTOR_NONE, /* Type is not a vector or not supported */
VECTOR_ALTIVEC, /* Use altivec for vector processing */
VECTOR_VSX, /* Use VSX for vector processing */
+ VECTOR_P8_VECTOR, /* Use ISA 2.07 VSX for vector processing */
VECTOR_PAIRED, /* Use paired floating point for vectors */
VECTOR_SPE, /* Use SPE for vector processing */
VECTOR_OTHER /* Some other vector unit */
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index d9bcf1a41ed..25bad1bfb68 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -50,6 +50,7 @@ extern rtx rs6000_got_register (rtx);
extern rtx find_addr_reg (rtx);
extern rtx gen_easy_altivec_constant (rtx);
extern const char *output_vec_const_move (rtx *);
+extern const char *rs6000_output_move_128bit (rtx *);
extern void rs6000_expand_vector_init (rtx, rtx);
extern void paired_expand_vector_init (rtx, rtx);
extern void rs6000_expand_vector_set (rtx, rtx, int);
@@ -70,6 +71,8 @@ extern int insvdi_rshift_rlwimi_p (rtx, rtx, rtx);
extern int registers_ok_for_quad_peep (rtx, rtx);
extern int mems_ok_for_quad_peep (rtx, rtx);
extern bool gpr_or_gpr_p (rtx, rtx);
+extern bool direct_move_p (rtx, rtx);
+extern bool quad_load_store_p (rtx, rtx);
extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx,
enum reg_class);
extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class,
@@ -138,6 +141,7 @@ extern int rs6000_loop_align (rtx);
#endif /* RTX_CODE */
#ifdef TREE_CODE
+extern unsigned int rs6000_data_alignment (tree, unsigned int, enum data_align);
extern unsigned int rs6000_special_round_type_align (tree, unsigned int,
unsigned int);
extern unsigned int darwin_rs6000_special_round_type_align (tree, unsigned int,
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index e82b24e22ce..2331c5029c2 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -292,6 +292,39 @@ typedef rtx (*gen_2arg_fn_t) (rtx, rtx, rtx);
don't link in rs6000-c.c, so we can't call it directly. */
void (*rs6000_target_modify_macros_ptr) (bool, HOST_WIDE_INT, HOST_WIDE_INT);
+/* Simplfy register classes into simpler classifications. We assume
+ GPR_REG_TYPE - FPR_REG_TYPE are ordered so that we can use a simple range
+ check for standard register classes (gpr/floating/altivec/vsx) and
+ floating/vector classes (float/altivec/vsx). */
+
+enum rs6000_reg_type {
+ NO_REG_TYPE,
+ PSEUDO_REG_TYPE,
+ GPR_REG_TYPE,
+ VSX_REG_TYPE,
+ ALTIVEC_REG_TYPE,
+ FPR_REG_TYPE,
+ SPR_REG_TYPE,
+ CR_REG_TYPE,
+ SPE_ACC_TYPE,
+ SPEFSCR_REG_TYPE
+};
+
+/* Map register class to register type. */
+static enum rs6000_reg_type reg_class_to_reg_type[N_REG_CLASSES];
+
+/* First/last register type for the 'normal' register types (i.e. general
+ purpose, floating point, altivec, and VSX registers). */
+#define IS_STD_REG_TYPE(RTYPE) IN_RANGE(RTYPE, GPR_REG_TYPE, FPR_REG_TYPE)
+
+#define IS_FP_VECT_REG_TYPE(RTYPE) IN_RANGE(RTYPE, VSX_REG_TYPE, FPR_REG_TYPE)
+
+/* Direct moves to/from vsx/gpr registers that need an additional register to
+ do the move. */
+static enum insn_code reload_fpr_gpr[NUM_MACHINE_MODES];
+static enum insn_code reload_gpr_vsx[NUM_MACHINE_MODES];
+static enum insn_code reload_vsx_gpr[NUM_MACHINE_MODES];
+
/* Target cpu costs. */
@@ -831,6 +864,25 @@ struct processor_costs power7_cost = {
12, /* prefetch streams */
};
+/* Instruction costs on POWER8 processors. */
+static const
+struct processor_costs power8_cost = {
+ COSTS_N_INSNS (3), /* mulsi */
+ COSTS_N_INSNS (3), /* mulsi_const */
+ COSTS_N_INSNS (3), /* mulsi_const9 */
+ COSTS_N_INSNS (3), /* muldi */
+ COSTS_N_INSNS (19), /* divsi */
+ COSTS_N_INSNS (35), /* divdi */
+ COSTS_N_INSNS (3), /* fp */
+ COSTS_N_INSNS (3), /* dmul */
+ COSTS_N_INSNS (14), /* sdiv */
+ COSTS_N_INSNS (17), /* ddiv */
+ 128, /* cache line size */
+ 32, /* l1 cache */
+ 256, /* l2 cache */
+ 12, /* prefetch streams */
+};
+
/* Instruction costs on POWER A2 processors. */
static const
struct processor_costs ppca2_cost = {
@@ -1023,6 +1075,13 @@ static void rs6000_print_isa_options (FILE *, int, const char *,
static void rs6000_print_builtin_options (FILE *, int, const char *,
HOST_WIDE_INT);
+static enum rs6000_reg_type register_to_reg_type (rtx, bool *);
+static bool rs6000_secondary_reload_move (enum rs6000_reg_type,
+ enum rs6000_reg_type,
+ enum machine_mode,
+ secondary_reload_info *,
+ bool);
+
/* Hash table stuff for keeping track of TOC entries. */
struct GTY(()) toc_hash_struct
@@ -1547,6 +1606,15 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
{
int last_regno = regno + rs6000_hard_regno_nregs[mode][regno] - 1;
+ /* PTImode can only go in GPRs. Quad word memory operations require even/odd
+ register combinations, and use PTImode where we need to deal with quad
+ word memory operations. Don't allow quad words in the argument or frame
+ pointer registers, just registers 0..31. */
+ if (mode == PTImode)
+ return (IN_RANGE (regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+ && IN_RANGE (last_regno, FIRST_GPR_REGNO, LAST_GPR_REGNO)
+ && ((regno & 1) == 0));
+
/* VSX registers that overlap the FPR registers are larger than for non-VSX
implementations. Don't allow an item to be split between a FP register
and an Altivec register. */
@@ -1559,8 +1627,7 @@ rs6000_hard_regno_mode_ok (int regno, enum machine_mode mode)
return ALTIVEC_REGNO_P (last_regno);
}
- /* Allow TImode in all VSX registers if the user asked for it. Note, PTImode
- can only go in GPRs. */
+ /* Allow TImode in all VSX registers if the user asked for it. */
if (mode == TImode && TARGET_VSX_TIMODE && VSX_REGNO_P (regno))
return 1;
@@ -1678,6 +1745,16 @@ rs6000_debug_reg_print (int first_regno, int last_regno, const char *reg_name)
comma = "";
}
+ len += fprintf (stderr, "%sreg-class = %s", comma,
+ reg_class_names[(int)rs6000_regno_regclass[r]]);
+ comma = ", ";
+
+ if (len > 70)
+ {
+ fprintf (stderr, ",\n\t");
+ comma = "";
+ }
+
fprintf (stderr, "%sregno = %d\n", comma, r);
}
}
@@ -1710,6 +1787,7 @@ rs6000_debug_reg_global (void)
"none",
"altivec",
"vsx",
+ "p8_vector",
"paired",
"spe",
"other"
@@ -1802,8 +1880,11 @@ rs6000_debug_reg_global (void)
"wf reg_class = %s\n"
"wg reg_class = %s\n"
"wl reg_class = %s\n"
+ "wm reg_class = %s\n"
+ "wr reg_class = %s\n"
"ws reg_class = %s\n"
"wt reg_class = %s\n"
+ "wv reg_class = %s\n"
"wx reg_class = %s\n"
"wz reg_class = %s\n"
"\n",
@@ -1815,8 +1896,11 @@ rs6000_debug_reg_global (void)
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wf]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wg]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wl]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wm]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wr]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_ws]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wt]],
+ reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wv]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wx]],
reg_class_names[rs6000_constraints[RS6000_CONSTRAINT_wz]]);
@@ -2050,6 +2134,10 @@ rs6000_debug_reg_global (void)
if (targetm.lra_p ())
fprintf (stderr, DEBUG_FMT_S, "lra", "true");
+ if (TARGET_P8_FUSION)
+ fprintf (stderr, DEBUG_FMT_S, "p8 fusion",
+ (TARGET_P8_FUSION_SIGN) ? "zero+sign" : "zero");
+
fprintf (stderr, DEBUG_FMT_S, "plt-format",
TARGET_SECURE_PLT ? "secure" : "bss");
fprintf (stderr, DEBUG_FMT_S, "struct-return",
@@ -2105,6 +2193,36 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_regno_regclass[ARG_POINTER_REGNUM] = BASE_REGS;
rs6000_regno_regclass[FRAME_POINTER_REGNUM] = BASE_REGS;
+ /* Precalculate register class to simpler reload register class. We don't
+ need all of the register classes that are combinations of different
+ classes, just the simple ones that have constraint letters. */
+ for (c = 0; c < N_REG_CLASSES; c++)
+ reg_class_to_reg_type[c] = NO_REG_TYPE;
+
+ reg_class_to_reg_type[(int)GENERAL_REGS] = GPR_REG_TYPE;
+ reg_class_to_reg_type[(int)BASE_REGS] = GPR_REG_TYPE;
+ reg_class_to_reg_type[(int)VSX_REGS] = VSX_REG_TYPE;
+ reg_class_to_reg_type[(int)VRSAVE_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)VSCR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)LINK_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)CTR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)LINK_OR_CTR_REGS] = SPR_REG_TYPE;
+ reg_class_to_reg_type[(int)CR_REGS] = CR_REG_TYPE;
+ reg_class_to_reg_type[(int)CR0_REGS] = CR_REG_TYPE;
+ reg_class_to_reg_type[(int)SPE_ACC_REGS] = SPE_ACC_TYPE;
+ reg_class_to_reg_type[(int)SPEFSCR_REGS] = SPEFSCR_REG_TYPE;
+
+ if (TARGET_VSX)
+ {
+ reg_class_to_reg_type[(int)FLOAT_REGS] = VSX_REG_TYPE;
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = VSX_REG_TYPE;
+ }
+ else
+ {
+ reg_class_to_reg_type[(int)FLOAT_REGS] = FPR_REG_TYPE;
+ reg_class_to_reg_type[(int)ALTIVEC_REGS] = ALTIVEC_REG_TYPE;
+ }
+
/* Precalculate vector information, this must be set up before the
rs6000_hard_regno_nregs_internal below. */
for (m = 0; m < NUM_MACHINE_MODES; ++m)
@@ -2177,12 +2295,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
}
}
- /* V2DImode, only allow under VSX, which can do V2DI insert/splat/extract.
- Altivec doesn't have 64-bit support. */
+ /* V2DImode, full mode depends on ISA 2.07 vector mode. Allow under VSX to
+ do insert/splat/extract. Altivec doesn't have 64-bit integer support. */
if (TARGET_VSX)
{
rs6000_vector_mem[V2DImode] = VECTOR_VSX;
- rs6000_vector_unit[V2DImode] = VECTOR_NONE;
+ rs6000_vector_unit[V2DImode]
+ = (TARGET_P8_VECTOR) ? VECTOR_P8_VECTOR : VECTOR_NONE;
rs6000_vector_align[V2DImode] = align64;
}
@@ -2240,13 +2359,30 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
if (TARGET_LFIWAX)
rs6000_constraints[RS6000_CONSTRAINT_wl] = FLOAT_REGS;
+ if (TARGET_DIRECT_MOVE)
+ rs6000_constraints[RS6000_CONSTRAINT_wm] = VSX_REGS;
+
+ if (TARGET_POWERPC64)
+ rs6000_constraints[RS6000_CONSTRAINT_wr] = GENERAL_REGS;
+
+ if (TARGET_P8_VECTOR)
+ rs6000_constraints[RS6000_CONSTRAINT_wv] = ALTIVEC_REGS;
+
if (TARGET_STFIWX)
rs6000_constraints[RS6000_CONSTRAINT_wx] = FLOAT_REGS;
if (TARGET_LFIWZX)
rs6000_constraints[RS6000_CONSTRAINT_wz] = FLOAT_REGS;
- /* Set up the reload helper functions. */
+ /* Setup the direct move combinations. */
+ for (m = 0; m < NUM_MACHINE_MODES; ++m)
+ {
+ reload_fpr_gpr[m] = CODE_FOR_nothing;
+ reload_gpr_vsx[m] = CODE_FOR_nothing;
+ reload_vsx_gpr[m] = CODE_FOR_nothing;
+ }
+
+ /* Set up the reload helper and direct move functions. */
if (TARGET_VSX || TARGET_ALTIVEC)
{
if (TARGET_64BIT)
@@ -2270,11 +2406,47 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_di_store;
rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_di_load;
}
+ if (TARGET_P8_VECTOR)
+ {
+ rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_di_store;
+ rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_di_load;
+ rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_di_store;
+ rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_di_load;
+ }
if (TARGET_VSX_TIMODE)
{
rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_di_store;
rs6000_vector_reload[TImode][1] = CODE_FOR_reload_ti_di_load;
}
+ if (TARGET_DIRECT_MOVE)
+ {
+ if (TARGET_POWERPC64)
+ {
+ reload_gpr_vsx[TImode] = CODE_FOR_reload_gpr_from_vsxti;
+ reload_gpr_vsx[V2DFmode] = CODE_FOR_reload_gpr_from_vsxv2df;
+ reload_gpr_vsx[V2DImode] = CODE_FOR_reload_gpr_from_vsxv2di;
+ reload_gpr_vsx[V4SFmode] = CODE_FOR_reload_gpr_from_vsxv4sf;
+ reload_gpr_vsx[V4SImode] = CODE_FOR_reload_gpr_from_vsxv4si;
+ reload_gpr_vsx[V8HImode] = CODE_FOR_reload_gpr_from_vsxv8hi;
+ reload_gpr_vsx[V16QImode] = CODE_FOR_reload_gpr_from_vsxv16qi;
+ reload_gpr_vsx[SFmode] = CODE_FOR_reload_gpr_from_vsxsf;
+
+ reload_vsx_gpr[TImode] = CODE_FOR_reload_vsx_from_gprti;
+ reload_vsx_gpr[V2DFmode] = CODE_FOR_reload_vsx_from_gprv2df;
+ reload_vsx_gpr[V2DImode] = CODE_FOR_reload_vsx_from_gprv2di;
+ reload_vsx_gpr[V4SFmode] = CODE_FOR_reload_vsx_from_gprv4sf;
+ reload_vsx_gpr[V4SImode] = CODE_FOR_reload_vsx_from_gprv4si;
+ reload_vsx_gpr[V8HImode] = CODE_FOR_reload_vsx_from_gprv8hi;
+ reload_vsx_gpr[V16QImode] = CODE_FOR_reload_vsx_from_gprv16qi;
+ reload_vsx_gpr[SFmode] = CODE_FOR_reload_vsx_from_gprsf;
+ }
+ else
+ {
+ reload_fpr_gpr[DImode] = CODE_FOR_reload_fpr_from_gprdi;
+ reload_fpr_gpr[DDmode] = CODE_FOR_reload_fpr_from_gprdd;
+ reload_fpr_gpr[DFmode] = CODE_FOR_reload_fpr_from_gprdf;
+ }
+ }
}
else
{
@@ -2297,6 +2469,13 @@ rs6000_init_hard_regno_mode_ok (bool global_init_p)
rs6000_vector_reload[DDmode][0] = CODE_FOR_reload_dd_si_store;
rs6000_vector_reload[DDmode][1] = CODE_FOR_reload_dd_si_load;
}
+ if (TARGET_P8_VECTOR)
+ {
+ rs6000_vector_reload[SFmode][0] = CODE_FOR_reload_sf_si_store;
+ rs6000_vector_reload[SFmode][1] = CODE_FOR_reload_sf_si_load;
+ rs6000_vector_reload[SDmode][0] = CODE_FOR_reload_sd_si_store;
+ rs6000_vector_reload[SDmode][1] = CODE_FOR_reload_sd_si_load;
+ }
if (TARGET_VSX_TIMODE)
{
rs6000_vector_reload[TImode][0] = CODE_FOR_reload_ti_si_store;
@@ -2520,16 +2699,18 @@ darwin_rs6000_override_options (void)
HOST_WIDE_INT
rs6000_builtin_mask_calculate (void)
{
- return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
- | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
- | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
- | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
- | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
- | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
- | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
- | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
- | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
- | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0));
+ return (((TARGET_ALTIVEC) ? RS6000_BTM_ALTIVEC : 0)
+ | ((TARGET_VSX) ? RS6000_BTM_VSX : 0)
+ | ((TARGET_SPE) ? RS6000_BTM_SPE : 0)
+ | ((TARGET_PAIRED_FLOAT) ? RS6000_BTM_PAIRED : 0)
+ | ((TARGET_FRE) ? RS6000_BTM_FRE : 0)
+ | ((TARGET_FRES) ? RS6000_BTM_FRES : 0)
+ | ((TARGET_FRSQRTE) ? RS6000_BTM_FRSQRTE : 0)
+ | ((TARGET_FRSQRTES) ? RS6000_BTM_FRSQRTES : 0)
+ | ((TARGET_POPCNTD) ? RS6000_BTM_POPCNTD : 0)
+ | ((rs6000_cpu == PROCESSOR_CELL) ? RS6000_BTM_CELL : 0)
+ | ((TARGET_P8_VECTOR) ? RS6000_BTM_P8_VECTOR : 0)
+ | ((TARGET_CRYPTO) ? RS6000_BTM_CRYPTO : 0));
}
/* Override command line options. Mostly we process the processor type and
@@ -2803,7 +2984,9 @@ rs6000_option_override_internal (bool global_init_p)
/* For the newer switches (vsx, dfp, etc.) set some of the older options,
unless the user explicitly used the -mno-<option> to disable the code. */
- if (TARGET_VSX)
+ if (TARGET_P8_VECTOR || TARGET_DIRECT_MOVE || TARGET_CRYPTO)
+ rs6000_isa_flags |= (ISA_2_7_MASKS_SERVER & ~rs6000_isa_flags_explicit);
+ else if (TARGET_VSX)
rs6000_isa_flags |= (ISA_2_6_MASKS_SERVER & ~rs6000_isa_flags_explicit);
else if (TARGET_POPCNTD)
rs6000_isa_flags |= (ISA_2_6_MASKS_EMBEDDED & ~rs6000_isa_flags_explicit);
@@ -2818,6 +3001,34 @@ rs6000_option_override_internal (bool global_init_p)
else if (TARGET_ALTIVEC)
rs6000_isa_flags |= (OPTION_MASK_PPC_GFXOPT & ~rs6000_isa_flags_explicit);
+ if (TARGET_CRYPTO && !TARGET_ALTIVEC)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_CRYPTO)
+ error ("-mcrypto requires -maltivec");
+ rs6000_isa_flags &= ~OPTION_MASK_CRYPTO;
+ }
+
+ if (TARGET_DIRECT_MOVE && !TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_DIRECT_MOVE)
+ error ("-mdirect-move requires -mvsx");
+ rs6000_isa_flags &= ~OPTION_MASK_DIRECT_MOVE;
+ }
+
+ if (TARGET_P8_VECTOR && !TARGET_ALTIVEC)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+ error ("-mpower8-vector requires -maltivec");
+ rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+ }
+
+ if (TARGET_P8_VECTOR && !TARGET_VSX)
+ {
+ if (rs6000_isa_flags_explicit & OPTION_MASK_P8_VECTOR)
+ error ("-mpower8-vector requires -mvsx");
+ rs6000_isa_flags &= ~OPTION_MASK_P8_VECTOR;
+ }
+
if (TARGET_VSX_TIMODE && !TARGET_VSX)
{
if (rs6000_isa_flags_explicit & OPTION_MASK_VSX_TIMODE)
@@ -2825,6 +3036,16 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_isa_flags &= ~OPTION_MASK_VSX_TIMODE;
}
+ /* The quad memory instructions only works in 64-bit mode. In 32-bit mode,
+ silently turn off quad memory mode. */
+ if (TARGET_QUAD_MEMORY && !TARGET_POWERPC64)
+ {
+ if ((rs6000_isa_flags_explicit & OPTION_MASK_QUAD_MEMORY) != 0)
+ warning (0, N_("-mquad-memory requires 64-bit mode"));
+
+ rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY;
+ }
+
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags);
@@ -2951,7 +3172,8 @@ rs6000_option_override_internal (bool global_init_p)
/* Place FP constants in the constant pool instead of TOC
if section anchors enabled. */
- if (flag_section_anchors)
+ if (flag_section_anchors
+ && !global_options_set.x_TARGET_NO_FP_IN_TOC)
TARGET_NO_FP_IN_TOC = 1;
if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET)
@@ -3019,16 +3241,19 @@ rs6000_option_override_internal (bool global_init_p)
&& rs6000_cpu != PROCESSOR_POWER5
&& rs6000_cpu != PROCESSOR_POWER6
&& rs6000_cpu != PROCESSOR_POWER7
+ && rs6000_cpu != PROCESSOR_POWER8
&& rs6000_cpu != PROCESSOR_PPCA2
&& rs6000_cpu != PROCESSOR_CELL
&& rs6000_cpu != PROCESSOR_PPC476);
rs6000_sched_groups = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
- || rs6000_cpu == PROCESSOR_POWER7);
+ || rs6000_cpu == PROCESSOR_POWER7
+ || rs6000_cpu == PROCESSOR_POWER8);
rs6000_align_branch_targets = (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER6
|| rs6000_cpu == PROCESSOR_POWER7
+ || rs6000_cpu == PROCESSOR_POWER8
|| rs6000_cpu == PROCESSOR_PPCE500MC
|| rs6000_cpu == PROCESSOR_PPCE500MC64
|| rs6000_cpu == PROCESSOR_PPCE5500
@@ -3272,6 +3497,10 @@ rs6000_option_override_internal (bool global_init_p)
rs6000_cost = &power7_cost;
break;
+ case PROCESSOR_POWER8:
+ rs6000_cost = &power8_cost;
+ break;
+
case PROCESSOR_PPCA2:
rs6000_cost = &ppca2_cost;
break;
@@ -3444,7 +3673,8 @@ rs6000_loop_align (rtx label)
&& (rs6000_cpu == PROCESSOR_POWER4
|| rs6000_cpu == PROCESSOR_POWER5
|| rs6000_cpu == PROCESSOR_POWER6
- || rs6000_cpu == PROCESSOR_POWER7))
+ || rs6000_cpu == PROCESSOR_POWER7
+ || rs6000_cpu == PROCESSOR_POWER8))
return 5;
else
return align_loops_log;
@@ -3983,6 +4213,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
enum built_in_function fn = DECL_FUNCTION_CODE (fndecl);
switch (fn)
{
+ case BUILT_IN_CLZIMAX:
+ case BUILT_IN_CLZLL:
+ case BUILT_IN_CLZL:
+ case BUILT_IN_CLZ:
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+ {
+ if (out_mode == QImode && out_n == 16)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZB];
+ else if (out_mode == HImode && out_n == 8)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZH];
+ else if (out_mode == SImode && out_n == 4)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZW];
+ else if (out_mode == DImode && out_n == 2)
+ return rs6000_builtin_decls[P8V_BUILTIN_VCLZD];
+ }
+ break;
case BUILT_IN_COPYSIGN:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
@@ -3998,6 +4244,22 @@ rs6000_builtin_vectorized_function (tree fndecl, tree type_out,
if (VECTOR_UNIT_ALTIVEC_P (V4SFmode))
return rs6000_builtin_decls[ALTIVEC_BUILTIN_COPYSIGN_V4SF];
break;
+ case BUILT_IN_POPCOUNTIMAX:
+ case BUILT_IN_POPCOUNTLL:
+ case BUILT_IN_POPCOUNTL:
+ case BUILT_IN_POPCOUNT:
+ if (TARGET_P8_VECTOR && in_mode == out_mode && out_n == in_n)
+ {
+ if (out_mode == QImode && out_n == 16)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTB];
+ else if (out_mode == HImode && out_n == 8)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTH];
+ else if (out_mode == SImode && out_n == 4)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTW];
+ else if (out_mode == DImode && out_n == 2)
+ return rs6000_builtin_decls[P8V_BUILTIN_VPOPCNTD];
+ }
+ break;
case BUILT_IN_SQRT:
if (VECTOR_UNIT_VSX_P (V2DFmode)
&& out_mode == DFmode && out_n == 2
@@ -4395,7 +4657,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
bitsize = GET_MODE_BITSIZE (inner);
mask = GET_MODE_MASK (inner);
- val = const_vector_elt_as_int (op, nunits - 1);
+ val = const_vector_elt_as_int (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
splat_val = val;
msb_val = val > 0 ? 0 : -1;
@@ -4435,7 +4697,7 @@ vspltis_constant (rtx op, unsigned step, unsigned copies)
for (i = 0; i < nunits - 1; ++i)
{
HOST_WIDE_INT desired_val;
- if (((i + 1) & (step - 1)) == 0)
+ if (((BYTES_BIG_ENDIAN ? i + 1 : i) & (step - 1)) == 0)
desired_val = val;
else
desired_val = msb_val;
@@ -4520,13 +4782,13 @@ gen_easy_altivec_constant (rtx op)
{
enum machine_mode mode = GET_MODE (op);
int nunits = GET_MODE_NUNITS (mode);
- rtx last = CONST_VECTOR_ELT (op, nunits - 1);
+ rtx val = CONST_VECTOR_ELT (op, BYTES_BIG_ENDIAN ? nunits - 1 : 0);
unsigned step = nunits / 4;
unsigned copies = 1;
/* Start with a vspltisw. */
if (vspltis_constant (op, step, copies))
- return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, last));
+ return gen_rtx_VEC_DUPLICATE (V4SImode, gen_lowpart (SImode, val));
/* Then try with a vspltish. */
if (step == 1)
@@ -4535,7 +4797,7 @@ gen_easy_altivec_constant (rtx op)
step >>= 1;
if (vspltis_constant (op, step, copies))
- return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, last));
+ return gen_rtx_VEC_DUPLICATE (V8HImode, gen_lowpart (HImode, val));
/* And finally a vspltisb. */
if (step == 1)
@@ -4544,7 +4806,7 @@ gen_easy_altivec_constant (rtx op)
step >>= 1;
if (vspltis_constant (op, step, copies))
- return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, last));
+ return gen_rtx_VEC_DUPLICATE (V16QImode, gen_lowpart (QImode, val));
gcc_unreachable ();
}
@@ -4856,8 +5118,11 @@ rs6000_expand_vector_init (rtx target, rtx vals)
{
rtx freg = gen_reg_rtx (V4SFmode);
rtx sreg = force_reg (SFmode, XVECEXP (vals, 0, 0));
+ rtx cvt = ((TARGET_XSCVDPSPN)
+ ? gen_vsx_xscvdpspn_scalar (freg, sreg)
+ : gen_vsx_xscvdpsp_scalar (freg, sreg));
- emit_insn (gen_vsx_xscvdpsp_scalar (freg, sreg));
+ emit_insn (cvt);
emit_insn (gen_vsx_xxspltw_v4sf (target, freg, const0_rtx));
}
else
@@ -5119,6 +5384,48 @@ invalid_e500_subreg (rtx op, enum machine_mode mode)
return false;
}
+/* Return alignment of TYPE. Existing alignment is ALIGN. HOW
+ selects whether the alignment is abi mandated, optional, or
+ both abi and optional alignment. */
+
+unsigned int
+rs6000_data_alignment (tree type, unsigned int align, enum data_align how)
+{
+ if (how != align_opt)
+ {
+ if (TREE_CODE (type) == VECTOR_TYPE)
+ {
+ if ((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (type)))
+ || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (type))))
+ {
+ if (align < 64)
+ align = 64;
+ }
+ else if (align < 128)
+ align = 128;
+ }
+ else if (TARGET_E500_DOUBLE
+ && TREE_CODE (type) == REAL_TYPE
+ && TYPE_MODE (type) == DFmode)
+ {
+ if (align < 64)
+ align = 64;
+ }
+ }
+
+ if (how != align_abi)
+ {
+ if (TREE_CODE (type) == ARRAY_TYPE
+ && TYPE_MODE (TREE_TYPE (type)) == QImode)
+ {
+ if (align < BITS_PER_WORD)
+ align = BITS_PER_WORD;
+ }
+ }
+
+ return align;
+}
+
/* AIX increases natural record alignment to doubleword if the first
field is an FP double while the FP fields remain word aligned. */
@@ -5240,6 +5547,72 @@ gpr_or_gpr_p (rtx op0, rtx op1)
|| (REG_P (op1) && INT_REGNO_P (REGNO (op1))));
}
+/* Return true if this is a move direct operation between GPR registers and
+ floating point/VSX registers. */
+
+bool
+direct_move_p (rtx op0, rtx op1)
+{
+ int regno0, regno1;
+
+ if (!REG_P (op0) || !REG_P (op1))
+ return false;
+
+ if (!TARGET_DIRECT_MOVE && !TARGET_MFPGPR)
+ return false;
+
+ regno0 = REGNO (op0);
+ regno1 = REGNO (op1);
+ if (regno0 >= FIRST_PSEUDO_REGISTER || regno1 >= FIRST_PSEUDO_REGISTER)
+ return false;
+
+ if (INT_REGNO_P (regno0))
+ return (TARGET_DIRECT_MOVE) ? VSX_REGNO_P (regno1) : FP_REGNO_P (regno1);
+
+ else if (INT_REGNO_P (regno1))
+ {
+ if (TARGET_MFPGPR && FP_REGNO_P (regno0))
+ return true;
+
+ else if (TARGET_DIRECT_MOVE && VSX_REGNO_P (regno0))
+ return true;
+ }
+
+ return false;
+}
+
+/* Return true if this is a load or store quad operation. */
+
+bool
+quad_load_store_p (rtx op0, rtx op1)
+{
+ bool ret;
+
+ if (!TARGET_QUAD_MEMORY)
+ ret = false;
+
+ else if (REG_P (op0) && MEM_P (op1))
+ ret = (quad_int_reg_operand (op0, GET_MODE (op0))
+ && quad_memory_operand (op1, GET_MODE (op1))
+ && !reg_overlap_mentioned_p (op0, op1));
+
+ else if (MEM_P (op0) && REG_P (op1))
+ ret = (quad_memory_operand (op0, GET_MODE (op0))
+ && quad_int_reg_operand (op1, GET_MODE (op1)));
+
+ else
+ ret = false;
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n========== quad_load_store, return %s\n",
+ ret ? "true" : "false");
+ debug_rtx (gen_rtx_SET (VOIDmode, op0, op1));
+ }
+
+ return ret;
+}
+
/* Given an address, return a constant offset term if one exists. */
static rtx
@@ -5375,91 +5748,102 @@ virtual_stack_registers_memory_p (rtx op)
&& regnum <= LAST_VIRTUAL_POINTER_REGISTER);
}
-/* Return true if memory accesses to OP are known to never straddle
- a 32k boundary. */
+/* Return true if a MODE sized memory accesses to OP plus OFFSET
+ is known to not straddle a 32k boundary. */
static bool
offsettable_ok_by_alignment (rtx op, HOST_WIDE_INT offset,
enum machine_mode mode)
{
tree decl, type;
- unsigned HOST_WIDE_INT dsize, dalign;
+ unsigned HOST_WIDE_INT dsize, dalign, lsb, mask;
if (GET_CODE (op) != SYMBOL_REF)
return false;
+ dsize = GET_MODE_SIZE (mode);
decl = SYMBOL_REF_DECL (op);
if (!decl)
{
- if (GET_MODE_SIZE (mode) == 0)
+ if (dsize == 0)
return false;
/* -fsection-anchors loses the original SYMBOL_REF_DECL when
replacing memory addresses with an anchor plus offset. We
could find the decl by rummaging around in the block->objects
VEC for the given offset but that seems like too much work. */
- dalign = 1;
+ dalign = BITS_PER_UNIT;
if (SYMBOL_REF_HAS_BLOCK_INFO_P (op)
&& SYMBOL_REF_ANCHOR_P (op)
&& SYMBOL_REF_BLOCK (op) != NULL)
{
struct object_block *block = SYMBOL_REF_BLOCK (op);
- HOST_WIDE_INT lsb, mask;
- /* Given the alignment of the block.. */
dalign = block->alignment;
- mask = dalign / BITS_PER_UNIT - 1;
-
- /* ..and the combined offset of the anchor and any offset
- to this block object.. */
offset += SYMBOL_REF_BLOCK_OFFSET (op);
- lsb = offset & -offset;
+ }
+ else if (CONSTANT_POOL_ADDRESS_P (op))
+ {
+ /* It would be nice to have get_pool_align().. */
+ enum machine_mode cmode = get_pool_mode (op);
- /* ..find how many bits of the alignment we know for the
- object. */
- mask &= lsb - 1;
- dalign = mask + 1;
+ dalign = GET_MODE_ALIGNMENT (cmode);
}
- return dalign >= GET_MODE_SIZE (mode);
}
-
- if (DECL_P (decl))
+ else if (DECL_P (decl))
{
- if (TREE_CODE (decl) == FUNCTION_DECL)
- return true;
+ dalign = DECL_ALIGN (decl);
- if (!DECL_SIZE_UNIT (decl))
- return false;
+ if (dsize == 0)
+ {
+ /* Allow BLKmode when the entire object is known to not
+ cross a 32k boundary. */
+ if (!DECL_SIZE_UNIT (decl))
+ return false;
- if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
- return false;
+ if (!host_integerp (DECL_SIZE_UNIT (decl), 1))
+ return false;
- dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
- if (dsize > 32768)
- return false;
+ dsize = tree_low_cst (DECL_SIZE_UNIT (decl), 1);
+ if (dsize > 32768)
+ return false;
- dalign = DECL_ALIGN_UNIT (decl);
- return dalign >= dsize;
+ return dalign / BITS_PER_UNIT >= dsize;
+ }
}
+ else
+ {
+ type = TREE_TYPE (decl);
- type = TREE_TYPE (decl);
+ dalign = TYPE_ALIGN (type);
+ if (CONSTANT_CLASS_P (decl))
+ dalign = CONSTANT_ALIGNMENT (decl, dalign);
+ else
+ dalign = DATA_ALIGNMENT (decl, dalign);
- if (TREE_CODE (decl) == STRING_CST)
- dsize = TREE_STRING_LENGTH (decl);
- else if (TYPE_SIZE_UNIT (type)
- && host_integerp (TYPE_SIZE_UNIT (type), 1))
- dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
- else
- return false;
- if (dsize > 32768)
- return false;
+ if (dsize == 0)
+ {
+ /* BLKmode, check the entire object. */
+ if (TREE_CODE (decl) == STRING_CST)
+ dsize = TREE_STRING_LENGTH (decl);
+ else if (TYPE_SIZE_UNIT (type)
+ && host_integerp (TYPE_SIZE_UNIT (type), 1))
+ dsize = tree_low_cst (TYPE_SIZE_UNIT (type), 1);
+ else
+ return false;
+ if (dsize > 32768)
+ return false;
+
+ return dalign / BITS_PER_UNIT >= dsize;
+ }
+ }
+
+ /* Find how many bits of the alignment we know for this access. */
+ mask = dalign / BITS_PER_UNIT - 1;
+ lsb = offset & -offset;
+ mask &= lsb - 1;
+ dalign = mask + 1;
- dalign = TYPE_ALIGN (type);
- if (CONSTANT_CLASS_P (decl))
- dalign = CONSTANT_ALIGNMENT (decl, dalign);
- else
- dalign = DATA_ALIGNMENT (decl, dalign);
- dalign /= BITS_PER_UNIT;
return dalign >= dsize;
}
@@ -5747,8 +6131,11 @@ rs6000_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
if (GET_CODE (x) == PLUS && XEXP (x, 1) == const0_rtx)
return force_reg (Pmode, XEXP (x, 0));
+ /* For TImode with load/store quad, restrict addresses to just a single
+ pointer, so it works with both GPRs and VSX registers. */
/* Make sure both operands are registers. */
- else if (GET_CODE (x) == PLUS)
+ else if (GET_CODE (x) == PLUS
+ && (mode != TImode || !TARGET_QUAD_MEMORY))
return gen_rtx_PLUS (Pmode,
force_reg (Pmode, XEXP (x, 0)),
force_reg (Pmode, XEXP (x, 1)));
@@ -6405,7 +6792,6 @@ use_toc_relative_ref (rtx sym)
&& ASM_OUTPUT_SPECIAL_POOL_ENTRY_P (get_pool_constant (sym),
get_pool_mode (sym)))
|| (TARGET_CMODEL == CMODEL_MEDIUM
- && !CONSTANT_POOL_ADDRESS_P (sym)
&& SYMBOL_REF_LOCAL_P (sym)));
}
@@ -6703,6 +7089,13 @@ rs6000_legitimate_address_p (enum machine_mode mode, rtx x, bool reg_ok_strict)
if (reg_offset_p
&& legitimate_constant_pool_address_p (x, mode, reg_ok_strict))
return 1;
+ /* For TImode, if we have load/store quad, only allow register indirect
+ addresses. This will allow the values to go in either GPRs or VSX
+ registers without reloading. The vector types would tend to go into VSX
+ registers, so we allow REG+REG, while TImode seems somewhat split, in that
+ some uses are GPR based, and some VSX based. */
+ if (mode == TImode && TARGET_QUAD_MEMORY)
+ return 0;
/* If not REG_OK_STRICT (before reload) let pass any stack offset. */
if (! reg_ok_strict
&& reg_offset_p
@@ -9215,20 +9608,17 @@ setup_incoming_varargs (cumulative_args_t cum, enum machine_mode mode,
if (! no_rtl && first_reg_offset < GP_ARG_NUM_REG
&& cfun->va_list_gpr_size)
{
- int nregs = GP_ARG_NUM_REG - first_reg_offset;
+ int n_gpr, nregs = GP_ARG_NUM_REG - first_reg_offset;
if (va_list_gpr_counter_field)
- {
- /* V4 va_list_gpr_size counts number of registers needed. */
- if (nregs > cfun->va_list_gpr_size)
- nregs = cfun->va_list_gpr_size;
- }
+ /* V4 va_list_gpr_size counts number of registers needed. */
+ n_gpr = cfun->va_list_gpr_size;
else
- {
- /* char * va_list instead counts number of bytes needed. */
- if (nregs > cfun->va_list_gpr_size / reg_size)
- nregs = cfun->va_list_gpr_size / reg_size;
- }
+ /* char * va_list instead counts number of bytes needed. */
+ n_gpr = (cfun->va_list_gpr_size + reg_size - 1) / reg_size;
+
+ if (nregs > n_gpr)
+ nregs = n_gpr;
mem = gen_rtx_MEM (BLKmode,
plus_constant (Pmode, save_area,
@@ -10578,6 +10968,27 @@ rs6000_expand_ternop_builtin (enum insn_code icode, tree exp, rtx target)
return const0_rtx;
}
}
+ else if (icode == CODE_FOR_crypto_vshasigmaw
+ || icode == CODE_FOR_crypto_vshasigmad)
+ {
+ /* Check whether the 2nd and 3rd arguments are integer constants and in
+ range and prepare arguments. */
+ STRIP_NOPS (arg1);
+ if (TREE_CODE (arg1) != INTEGER_CST
+ || !IN_RANGE (TREE_INT_CST_LOW (arg1), 0, 1))
+ {
+ error ("argument 2 must be 0 or 1");
+ return const0_rtx;
+ }
+
+ STRIP_NOPS (arg2);
+ if (TREE_CODE (arg2) != INTEGER_CST
+ || !IN_RANGE (TREE_INT_CST_LOW (arg2), 0, 15))
+ {
+ error ("argument 3 must be in the range 0..15");
+ return const0_rtx;
+ }
+ }
if (target == 0
|| GET_MODE (target) != tmode
@@ -12268,6 +12679,10 @@ altivec_init_builtins (void)
= build_function_type_list (integer_type_node,
integer_type_node, V4SI_type_node,
V4SI_type_node, NULL_TREE);
+ tree int_ftype_int_v2di_v2di
+ = build_function_type_list (integer_type_node,
+ integer_type_node, V2DI_type_node,
+ V2DI_type_node, NULL_TREE);
tree void_ftype_v4si
= build_function_type_list (void_type_node, V4SI_type_node, NULL_TREE);
tree v8hi_ftype_void
@@ -12350,6 +12765,8 @@ altivec_init_builtins (void)
= build_function_type_list (integer_type_node,
integer_type_node, V2DF_type_node,
V2DF_type_node, NULL_TREE);
+ tree v2di_ftype_v2di
+ = build_function_type_list (V2DI_type_node, V2DI_type_node, NULL_TREE);
tree v4si_ftype_v4si
= build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
tree v8hi_ftype_v8hi
@@ -12485,6 +12902,9 @@ altivec_init_builtins (void)
case VOIDmode:
type = int_ftype_int_opaque_opaque;
break;
+ case V2DImode:
+ type = int_ftype_int_v2di_v2di;
+ break;
case V4SImode:
type = int_ftype_int_v4si_v4si;
break;
@@ -12518,6 +12938,9 @@ altivec_init_builtins (void)
switch (mode0)
{
+ case V2DImode:
+ type = v2di_ftype_v2di;
+ break;
case V4SImode:
type = v4si_ftype_v4si;
break;
@@ -12723,11 +13146,27 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
are type correct. */
switch (builtin)
{
+ /* unsigned 1 argument functions. */
+ case CRYPTO_BUILTIN_VSBOX:
+ case P8V_BUILTIN_VGBBD:
+ h.uns_p[0] = 1;
+ h.uns_p[1] = 1;
+ break;
+
/* unsigned 2 argument functions. */
case ALTIVEC_BUILTIN_VMULEUB_UNS:
case ALTIVEC_BUILTIN_VMULEUH_UNS:
case ALTIVEC_BUILTIN_VMULOUB_UNS:
case ALTIVEC_BUILTIN_VMULOUH_UNS:
+ case CRYPTO_BUILTIN_VCIPHER:
+ case CRYPTO_BUILTIN_VCIPHERLAST:
+ case CRYPTO_BUILTIN_VNCIPHER:
+ case CRYPTO_BUILTIN_VNCIPHERLAST:
+ case CRYPTO_BUILTIN_VPMSUMB:
+ case CRYPTO_BUILTIN_VPMSUMH:
+ case CRYPTO_BUILTIN_VPMSUMW:
+ case CRYPTO_BUILTIN_VPMSUMD:
+ case CRYPTO_BUILTIN_VPMSUM:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
h.uns_p[2] = 1;
@@ -12750,6 +13189,14 @@ builtin_function_type (enum machine_mode mode_ret, enum machine_mode mode_arg0,
case VSX_BUILTIN_XXSEL_8HI_UNS:
case VSX_BUILTIN_XXSEL_4SI_UNS:
case VSX_BUILTIN_XXSEL_2DI_UNS:
+ case CRYPTO_BUILTIN_VPERMXOR:
+ case CRYPTO_BUILTIN_VPERMXOR_V2DI:
+ case CRYPTO_BUILTIN_VPERMXOR_V4SI:
+ case CRYPTO_BUILTIN_VPERMXOR_V8HI:
+ case CRYPTO_BUILTIN_VPERMXOR_V16QI:
+ case CRYPTO_BUILTIN_VSHASIGMAW:
+ case CRYPTO_BUILTIN_VSHASIGMAD:
+ case CRYPTO_BUILTIN_VSHASIGMA:
h.uns_p[0] = 1;
h.uns_p[1] = 1;
h.uns_p[2] = 1;
@@ -12891,8 +13338,23 @@ rs6000_common_init_builtins (void)
else
{
enum insn_code icode = d->icode;
- if (d->name == 0 || icode == CODE_FOR_nothing)
- continue;
+ if (d->name == 0)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, bdesc_3arg[%ld] no name\n",
+ (long unsigned)i);
+
+ continue;
+ }
+
+ if (icode == CODE_FOR_nothing)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, skip ternary %s (no code)\n",
+ d->name);
+
+ continue;
+ }
type = builtin_function_type (insn_data[icode].operand[0].mode,
insn_data[icode].operand[1].mode,
@@ -12931,8 +13393,23 @@ rs6000_common_init_builtins (void)
else
{
enum insn_code icode = d->icode;
- if (d->name == 0 || icode == CODE_FOR_nothing)
- continue;
+ if (d->name == 0)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, bdesc_2arg[%ld] no name\n",
+ (long unsigned)i);
+
+ continue;
+ }
+
+ if (icode == CODE_FOR_nothing)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, skip binary %s (no code)\n",
+ d->name);
+
+ continue;
+ }
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
@@ -12993,8 +13470,23 @@ rs6000_common_init_builtins (void)
else
{
enum insn_code icode = d->icode;
- if (d->name == 0 || icode == CODE_FOR_nothing)
- continue;
+ if (d->name == 0)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, bdesc_1arg[%ld] no name\n",
+ (long unsigned)i);
+
+ continue;
+ }
+
+ if (icode == CODE_FOR_nothing)
+ {
+ if (TARGET_DEBUG_BUILTIN)
+ fprintf (stderr, "rs6000_builtin, skip unary %s (no code)\n",
+ d->name);
+
+ continue;
+ }
mode0 = insn_data[icode].operand[0].mode;
mode1 = insn_data[icode].operand[1].mode;
@@ -13747,29 +14239,226 @@ rs6000_check_sdmode (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
return NULL_TREE;
}
-enum reload_reg_type {
- GPR_REGISTER_TYPE,
- VECTOR_REGISTER_TYPE,
- OTHER_REGISTER_TYPE
-};
+/* Classify a register type. Because the FMRGOW/FMRGEW instructions only work
+ on traditional floating point registers, and the VMRGOW/VMRGEW instructions
+ only work on the traditional altivec registers, note if an altivec register
+ was choosen. */
-static enum reload_reg_type
-rs6000_reload_register_type (enum reg_class rclass)
+static enum rs6000_reg_type
+register_to_reg_type (rtx reg, bool *is_altivec)
{
- switch (rclass)
+ HOST_WIDE_INT regno;
+ enum reg_class rclass;
+
+ if (GET_CODE (reg) == SUBREG)
+ reg = SUBREG_REG (reg);
+
+ if (!REG_P (reg))
+ return NO_REG_TYPE;
+
+ regno = REGNO (reg);
+ if (regno >= FIRST_PSEUDO_REGISTER)
{
- case GENERAL_REGS:
- case BASE_REGS:
- return GPR_REGISTER_TYPE;
+ if (!lra_in_progress && !reload_in_progress && !reload_completed)
+ return PSEUDO_REG_TYPE;
- case FLOAT_REGS:
- case ALTIVEC_REGS:
- case VSX_REGS:
- return VECTOR_REGISTER_TYPE;
+ regno = true_regnum (reg);
+ if (regno < 0 || regno >= FIRST_PSEUDO_REGISTER)
+ return PSEUDO_REG_TYPE;
+ }
- default:
- return OTHER_REGISTER_TYPE;
+ gcc_assert (regno >= 0);
+
+ if (is_altivec && ALTIVEC_REGNO_P (regno))
+ *is_altivec = true;
+
+ rclass = rs6000_regno_regclass[regno];
+ return reg_class_to_reg_type[(int)rclass];
+}
+
+/* Helper function for rs6000_secondary_reload to return true if a move to a
+ different register classe is really a simple move. */
+
+static bool
+rs6000_secondary_reload_simple_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode)
+{
+ int size;
+
+ /* Add support for various direct moves available. In this function, we only
+ look at cases where we don't need any extra registers, and one or more
+ simple move insns are issued. At present, 32-bit integers are not allowed
+ in FPR/VSX registers. Single precision binary floating is not a simple
+ move because we need to convert to the single precision memory layout.
+ The 4-byte SDmode can be moved. */
+ size = GET_MODE_SIZE (mode);
+ if (TARGET_DIRECT_MOVE
+ && ((mode == SDmode) || (TARGET_POWERPC64 && size == 8))
+ && ((to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ || (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ else if (TARGET_MFPGPR && TARGET_POWERPC64 && size == 8
+ && ((to_type == GPR_REG_TYPE && from_type == FPR_REG_TYPE)
+ || (to_type == FPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ else if ((size == 4 || (TARGET_POWERPC64 && size == 8))
+ && ((to_type == GPR_REG_TYPE && from_type == SPR_REG_TYPE)
+ || (to_type == SPR_REG_TYPE && from_type == GPR_REG_TYPE)))
+ return true;
+
+ return false;
+}
+
+/* Power8 helper function for rs6000_secondary_reload, handle all of the
+ special direct moves that involve allocating an extra register, return the
+ insn code of the helper function if there is such a function or
+ CODE_FOR_nothing if not. */
+
+static bool
+rs6000_secondary_reload_direct_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode,
+ secondary_reload_info *sri,
+ bool altivec_p)
+{
+ bool ret = false;
+ enum insn_code icode = CODE_FOR_nothing;
+ int cost = 0;
+ int size = GET_MODE_SIZE (mode);
+
+ if (TARGET_POWERPC64)
+ {
+ if (size == 16)
+ {
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
+ 64-bit values back together. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
+ bottom 64-bit value. */
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+ }
+
+ else if (mode == SFmode)
+ {
+ if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* xscvdpspn, mfvsrd, and. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+
+ else if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 2; /* mtvsrz, xscvspdpn. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+ }
}
+
+ if (TARGET_POWERPC64 && size == 16)
+ {
+ /* Handle moving 128-bit values from GPRs to VSX point registers on
+ power8 when running in 64-bit mode using XXPERMDI to glue the two
+ 64-bit values back together. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE)
+ {
+ cost = 3; /* 2 mtvsrd's, 1 xxpermdi. */
+ icode = reload_vsx_gpr[(int)mode];
+ }
+
+ /* Handle moving 128-bit values from VSX point registers to GPRs on
+ power8 when running in 64-bit mode using XXPERMDI to get access to the
+ bottom 64-bit value. */
+ else if (to_type == GPR_REG_TYPE && from_type == VSX_REG_TYPE)
+ {
+ cost = 3; /* 2 mfvsrd's, 1 xxpermdi. */
+ icode = reload_gpr_vsx[(int)mode];
+ }
+ }
+
+ else if (!TARGET_POWERPC64 && size == 8)
+ {
+ /* Handle moving 64-bit values from GPRs to floating point registers on
+ power8 when running in 32-bit mode using FMRGOW to glue the two 32-bit
+ values back together. Altivec register classes must be handled
+ specially since a different instruction is used, and the secondary
+ reload support requires a single instruction class in the scratch
+ register constraint. However, right now TFmode is not allowed in
+ Altivec registers, so the pattern will never match. */
+ if (to_type == VSX_REG_TYPE && from_type == GPR_REG_TYPE && !altivec_p)
+ {
+ cost = 3; /* 2 mtvsrwz's, 1 fmrgow. */
+ icode = reload_fpr_gpr[(int)mode];
+ }
+ }
+
+ if (icode != CODE_FOR_nothing)
+ {
+ ret = true;
+ if (sri)
+ {
+ sri->icode = icode;
+ sri->extra_cost = cost;
+ }
+ }
+
+ return ret;
+}
+
+/* Return whether a move between two register classes can be done either
+ directly (simple move) or via a pattern that uses a single extra temporary
+ (using power8's direct move in this case. */
+
+static bool
+rs6000_secondary_reload_move (enum rs6000_reg_type to_type,
+ enum rs6000_reg_type from_type,
+ enum machine_mode mode,
+ secondary_reload_info *sri,
+ bool altivec_p)
+{
+ /* Fall back to load/store reloads if either type is not a register. */
+ if (to_type == NO_REG_TYPE || from_type == NO_REG_TYPE)
+ return false;
+
+ /* If we haven't allocated registers yet, assume the move can be done for the
+ standard register types. */
+ if ((to_type == PSEUDO_REG_TYPE && from_type == PSEUDO_REG_TYPE)
+ || (to_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (from_type))
+ || (from_type == PSEUDO_REG_TYPE && IS_STD_REG_TYPE (to_type)))
+ return true;
+
+ /* Moves to the same set of registers is a simple move for non-specialized
+ registers. */
+ if (to_type == from_type && IS_STD_REG_TYPE (to_type))
+ return true;
+
+ /* Check whether a simple move can be done directly. */
+ if (rs6000_secondary_reload_simple_move (to_type, from_type, mode))
+ {
+ if (sri)
+ {
+ sri->icode = CODE_FOR_nothing;
+ sri->extra_cost = 0;
+ }
+ return true;
+ }
+
+ /* Now check if we can do it in a few steps. */
+ return rs6000_secondary_reload_direct_move (to_type, from_type, mode, sri,
+ altivec_p);
}
/* Inform reload about cases where moving X with a mode MODE to a register in
@@ -13795,11 +14484,32 @@ rs6000_secondary_reload (bool in_p,
bool default_p = false;
sri->icode = CODE_FOR_nothing;
-
- /* Convert vector loads and stores into gprs to use an additional base
- register. */
icode = rs6000_vector_reload[mode][in_p != false];
- if (icode != CODE_FOR_nothing)
+
+ if (REG_P (x) || register_operand (x, mode))
+ {
+ enum rs6000_reg_type to_type = reg_class_to_reg_type[(int)rclass];
+ bool altivec_p = (rclass == ALTIVEC_REGS);
+ enum rs6000_reg_type from_type = register_to_reg_type (x, &altivec_p);
+
+ if (!in_p)
+ {
+ enum rs6000_reg_type exchange = to_type;
+ to_type = from_type;
+ from_type = exchange;
+ }
+
+ if (rs6000_secondary_reload_move (to_type, from_type, mode, sri,
+ altivec_p))
+ {
+ icode = (enum insn_code)sri->icode;
+ default_p = false;
+ ret = NO_REGS;
+ }
+ }
+
+ /* Handle vector moves with reload helper functions. */
+ if (ret == ALL_REGS && icode != CODE_FOR_nothing)
{
ret = NO_REGS;
sri->icode = CODE_FOR_nothing;
@@ -13811,12 +14521,21 @@ rs6000_secondary_reload (bool in_p,
/* Loads to and stores from gprs can do reg+offset, and wouldn't need
an extra register in that case, but it would need an extra
- register if the addressing is reg+reg or (reg+reg)&(-16). */
+ register if the addressing is reg+reg or (reg+reg)&(-16). Special
+ case load/store quad. */
if (rclass == GENERAL_REGS || rclass == BASE_REGS)
{
- if (!legitimate_indirect_address_p (addr, false)
- && !rs6000_legitimate_offset_address_p (PTImode, addr,
- false, true))
+ if (TARGET_POWERPC64 && TARGET_QUAD_MEMORY
+ && GET_MODE_SIZE (mode) == 16
+ && quad_memory_operand (x, mode))
+ {
+ sri->icode = icode;
+ sri->extra_cost = 2;
+ }
+
+ else if (!legitimate_indirect_address_p (addr, false)
+ && !rs6000_legitimate_offset_address_p (PTImode, addr,
+ false, true))
{
sri->icode = icode;
/* account for splitting the loads, and converting the
@@ -13830,7 +14549,7 @@ rs6000_secondary_reload (bool in_p,
else if ((rclass == FLOAT_REGS || rclass == NO_REGS)
&& (GET_MODE_SIZE (mode) == 4 || GET_MODE_SIZE (mode) == 8)
&& (legitimate_indirect_address_p (addr, false)
- || legitimate_indirect_address_p (XEXP (addr, 0), false)
+ || legitimate_indirect_address_p (addr, false)
|| rs6000_legitimate_offset_address_p (mode, addr,
false, true)))
@@ -13882,12 +14601,12 @@ rs6000_secondary_reload (bool in_p,
else
{
enum reg_class xclass = REGNO_REG_CLASS (regno);
- enum reload_reg_type rtype1 = rs6000_reload_register_type (rclass);
- enum reload_reg_type rtype2 = rs6000_reload_register_type (xclass);
+ enum rs6000_reg_type rtype1 = reg_class_to_reg_type[(int)rclass];
+ enum rs6000_reg_type rtype2 = reg_class_to_reg_type[(int)xclass];
/* If memory is needed, use default_secondary_reload to create the
stack slot. */
- if (rtype1 != rtype2 || rtype1 == OTHER_REGISTER_TYPE)
+ if (rtype1 != rtype2 || !IS_STD_REG_TYPE (rtype1))
default_p = true;
else
ret = NO_REGS;
@@ -13897,7 +14616,7 @@ rs6000_secondary_reload (bool in_p,
default_p = true;
}
else if (TARGET_POWERPC64
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
&& MEM_P (x)
&& GET_MODE_SIZE (GET_MODE (x)) >= UNITS_PER_WORD)
{
@@ -13936,7 +14655,7 @@ rs6000_secondary_reload (bool in_p,
default_p = true;
}
else if (!TARGET_POWERPC64
- && rs6000_reload_register_type (rclass) == GPR_REGISTER_TYPE
+ && reg_class_to_reg_type[(int)rclass] == GPR_REG_TYPE
&& MEM_P (x)
&& GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
{
@@ -14499,42 +15218,25 @@ rs6000_debug_preferred_reload_class (rtx x, enum reg_class rclass)
set and vice versa. */
static bool
-rs6000_secondary_memory_needed (enum reg_class class1,
- enum reg_class class2,
+rs6000_secondary_memory_needed (enum reg_class from_class,
+ enum reg_class to_class,
enum machine_mode mode)
{
- if (class1 == class2)
- return false;
-
- /* Under VSX, there are 3 register classes that values could be in (VSX_REGS,
- ALTIVEC_REGS, and FLOAT_REGS). We don't need to use memory to copy
- between these classes. But we need memory for other things that can go in
- FLOAT_REGS like SFmode. */
- if (TARGET_VSX
- && (VECTOR_MEM_VSX_P (mode) || VECTOR_UNIT_VSX_P (mode))
- && (class1 == VSX_REGS || class1 == ALTIVEC_REGS
- || class1 == FLOAT_REGS))
- return (class2 != VSX_REGS && class2 != ALTIVEC_REGS
- && class2 != FLOAT_REGS);
+ enum rs6000_reg_type from_type, to_type;
+ bool altivec_p = ((from_class == ALTIVEC_REGS)
+ || (to_class == ALTIVEC_REGS));
- if (class1 == VSX_REGS || class2 == VSX_REGS)
- return true;
+ /* If a simple/direct move is available, we don't need secondary memory */
+ from_type = reg_class_to_reg_type[(int)from_class];
+ to_type = reg_class_to_reg_type[(int)to_class];
- if (class1 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
-
- if (class2 == FLOAT_REGS
- && (!TARGET_MFPGPR || !TARGET_POWERPC64
- || ((mode != DFmode)
- && (mode != DDmode)
- && (mode != DImode))))
- return true;
+ if (rs6000_secondary_reload_move (to_type, from_type, mode,
+ (secondary_reload_info *)0, altivec_p))
+ return false;
- if (class1 == ALTIVEC_REGS || class2 == ALTIVEC_REGS)
+ /* If we have a floating point or vector register class, we need to use
+ memory to transfer the data. */
+ if (IS_FP_VECT_REG_TYPE (from_type) || IS_FP_VECT_REG_TYPE (to_type))
return true;
return false;
@@ -14542,17 +15244,19 @@ rs6000_secondary_memory_needed (enum reg_class class1,
/* Debug version of rs6000_secondary_memory_needed. */
static bool
-rs6000_debug_secondary_memory_needed (enum reg_class class1,
- enum reg_class class2,
+rs6000_debug_secondary_memory_needed (enum reg_class from_class,
+ enum reg_class to_class,
enum machine_mode mode)
{
- bool ret = rs6000_secondary_memory_needed (class1, class2, mode);
+ bool ret = rs6000_secondary_memory_needed (from_class, to_class, mode);
fprintf (stderr,
- "rs6000_secondary_memory_needed, return: %s, class1 = %s, "
- "class2 = %s, mode = %s\n",
- ret ? "true" : "false", reg_class_names[class1],
- reg_class_names[class2], GET_MODE_NAME (mode));
+ "rs6000_secondary_memory_needed, return: %s, from_class = %s, "
+ "to_class = %s, mode = %s\n",
+ ret ? "true" : "false",
+ reg_class_names[from_class],
+ reg_class_names[to_class],
+ GET_MODE_NAME (mode));
return ret;
}
@@ -14758,6 +15462,170 @@ rs6000_debug_cannot_change_mode_class (enum machine_mode from,
return ret;
}
+/* Return a string to do a move operation of 128 bits of data. */
+
+const char *
+rs6000_output_move_128bit (rtx operands[])
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ enum machine_mode mode = GET_MODE (dest);
+ int dest_regno;
+ int src_regno;
+ bool dest_gpr_p, dest_fp_p, dest_av_p, dest_vsx_p;
+ bool src_gpr_p, src_fp_p, src_av_p, src_vsx_p;
+
+ if (REG_P (dest))
+ {
+ dest_regno = REGNO (dest);
+ dest_gpr_p = INT_REGNO_P (dest_regno);
+ dest_fp_p = FP_REGNO_P (dest_regno);
+ dest_av_p = ALTIVEC_REGNO_P (dest_regno);
+ dest_vsx_p = dest_fp_p | dest_av_p;
+ }
+ else
+ {
+ dest_regno = -1;
+ dest_gpr_p = dest_fp_p = dest_av_p = dest_vsx_p = false;
+ }
+
+ if (REG_P (src))
+ {
+ src_regno = REGNO (src);
+ src_gpr_p = INT_REGNO_P (src_regno);
+ src_fp_p = FP_REGNO_P (src_regno);
+ src_av_p = ALTIVEC_REGNO_P (src_regno);
+ src_vsx_p = src_fp_p | src_av_p;
+ }
+ else
+ {
+ src_regno = -1;
+ src_gpr_p = src_fp_p = src_av_p = src_vsx_p = false;
+ }
+
+ /* Register moves. */
+ if (dest_regno >= 0 && src_regno >= 0)
+ {
+ if (dest_gpr_p)
+ {
+ if (src_gpr_p)
+ return "#";
+
+ else if (TARGET_VSX && TARGET_DIRECT_MOVE && src_vsx_p)
+ return "#";
+ }
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ if (src_vsx_p)
+ return "xxlor %x0,%x1,%x1";
+
+ else if (TARGET_DIRECT_MOVE && src_gpr_p)
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p && src_av_p)
+ return "vor %0,%1,%1";
+
+ else if (dest_fp_p && src_fp_p)
+ return "#";
+ }
+
+ /* Loads. */
+ else if (dest_regno >= 0 && MEM_P (src))
+ {
+ if (dest_gpr_p)
+ {
+ if (TARGET_QUAD_MEMORY && (dest_regno & 1) == 0
+ && quad_memory_operand (src, mode)
+ && !reg_overlap_mentioned_p (dest, src))
+ {
+ /* lq/stq only has DQ-form, so avoid X-form that %y produces. */
+ return REG_P (XEXP (src, 0)) ? "lq %0,%1" : "lq %0,%y1";
+ }
+ else
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p
+ && altivec_indexed_or_indirect_operand (src, mode))
+ return "lvx %0,%y1";
+
+ else if (TARGET_VSX && dest_vsx_p)
+ {
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ return "lxvw4x %x0,%y1";
+ else
+ return "lxvd2x %x0,%y1";
+ }
+
+ else if (TARGET_ALTIVEC && dest_av_p)
+ return "lvx %0,%y1";
+
+ else if (dest_fp_p)
+ return "#";
+ }
+
+ /* Stores. */
+ else if (src_regno >= 0 && MEM_P (dest))
+ {
+ if (src_gpr_p)
+ {
+ if (TARGET_QUAD_MEMORY && (src_regno & 1) == 0
+ && quad_memory_operand (dest, mode))
+ {
+ /* lq/stq only has DQ-form, so avoid X-form that %y produces. */
+ return REG_P (XEXP (dest, 0)) ? "stq %1,%0" : "stq %1,%y0";
+ }
+ else
+ return "#";
+ }
+
+ else if (TARGET_ALTIVEC && src_av_p
+ && altivec_indexed_or_indirect_operand (src, mode))
+ return "stvx %1,%y0";
+
+ else if (TARGET_VSX && src_vsx_p)
+ {
+ if (mode == V16QImode || mode == V8HImode || mode == V4SImode)
+ return "stxvw4x %x1,%y0";
+ else
+ return "stxvd2x %x1,%y0";
+ }
+
+ else if (TARGET_ALTIVEC && src_av_p)
+ return "stvx %1,%y0";
+
+ else if (src_fp_p)
+ return "#";
+ }
+
+ /* Constants. */
+ else if (dest_regno >= 0
+ && (GET_CODE (src) == CONST_INT
+ || GET_CODE (src) == CONST_DOUBLE
+ || GET_CODE (src) == CONST_VECTOR))
+ {
+ if (dest_gpr_p)
+ return "#";
+
+ else if (TARGET_VSX && dest_vsx_p && zero_constant (src, mode))
+ return "xxlxor %x0,%x0,%x0";
+
+ else if (TARGET_ALTIVEC && dest_av_p)
+ return output_vec_const_move (operands);
+ }
+
+ if (TARGET_DEBUG_ADDR)
+ {
+ fprintf (stderr, "\n===== Bad 128 bit move:\n");
+ debug_rtx (gen_rtx_SET (VOIDmode, dest, src));
+ }
+
+ gcc_unreachable ();
+}
+
+
/* Given a comparison operation, return the bit number in CCR to test. We
know this is a valid comparison.
@@ -15474,11 +16342,6 @@ print_operand (FILE *file, rtx x, int code)
TOCs and the like. */
gcc_assert (GET_CODE (x) == SYMBOL_REF);
- /* Mark the decl as referenced so that cgraph will output the
- function. */
- if (SYMBOL_REF_DECL (x))
- mark_decl_referenced (SYMBOL_REF_DECL (x));
-
/* For macho, check to see if we need a stub. */
if (TARGET_MACHO)
{
@@ -15887,16 +16750,41 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
{
rtx cmp, or_result, compare_result2;
enum machine_mode op_mode = GET_MODE (op0);
+ bool reverse_p;
if (op_mode == VOIDmode)
op_mode = GET_MODE (op1);
+ /* First reverse the condition codes that aren't directly supported. */
+ switch (code)
+ {
+ case NE:
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ code = reverse_condition_maybe_unordered (code);
+ reverse_p = true;
+ break;
+
+ case EQ:
+ case LT:
+ case LE:
+ case GT:
+ case GE:
+ reverse_p = false;
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
/* The E500 FP compare instructions toggle the GT bit (CR bit 1) only.
This explains the following mess. */
switch (code)
{
- case EQ: case UNEQ: case NE: case LTGT:
+ case EQ:
switch (op_mode)
{
case SFmode:
@@ -15922,7 +16810,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
}
break;
- case GT: case GTU: case UNGT: case UNGE: case GE: case GEU:
+ case GT:
+ case GE:
switch (op_mode)
{
case SFmode:
@@ -15948,7 +16837,8 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
}
break;
- case LT: case LTU: case UNLT: case UNLE: case LE: case LEU:
+ case LT:
+ case LE:
switch (op_mode)
{
case SFmode:
@@ -15973,24 +16863,16 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
gcc_unreachable ();
}
break;
+
default:
gcc_unreachable ();
}
/* Synthesize LE and GE from LT/GT || EQ. */
- if (code == LE || code == GE || code == LEU || code == GEU)
+ if (code == LE || code == GE)
{
emit_insn (cmp);
- switch (code)
- {
- case LE: code = LT; break;
- case GE: code = GT; break;
- case LEU: code = LT; break;
- case GEU: code = GT; break;
- default: gcc_unreachable ();
- }
-
compare_result2 = gen_reg_rtx (CCFPmode);
/* Do the EQ. */
@@ -16017,23 +16899,18 @@ rs6000_generate_compare (rtx cmp, enum machine_mode mode)
default:
gcc_unreachable ();
}
+
emit_insn (cmp);
/* OR them together. */
or_result = gen_reg_rtx (CCFPmode);
cmp = gen_e500_cr_ior_compare (or_result, compare_result,
- compare_result2);
+ compare_result2);
compare_result = or_result;
- code = EQ;
- }
- else
- {
- if (code == NE || code == LTGT)
- code = NE;
- else
- code = EQ;
}
+ code = reverse_p ? NE : EQ;
+
emit_insn (cmp);
}
else
@@ -16913,7 +17790,8 @@ emit_unlikely_jump (rtx cond, rtx label)
}
/* A subroutine of the atomic operation splitters. Emit a load-locked
- instruction in MODE. */
+ instruction in MODE. For QI/HImode, possibly use a pattern than includes
+ the zero_extend operation. */
static void
emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
@@ -16922,12 +17800,26 @@ emit_load_locked (enum machine_mode mode, rtx reg, rtx mem)
switch (mode)
{
+ case QImode:
+ fn = gen_load_lockedqi;
+ break;
+ case HImode:
+ fn = gen_load_lockedhi;
+ break;
case SImode:
- fn = gen_load_lockedsi;
+ if (GET_MODE (mem) == QImode)
+ fn = gen_load_lockedqi_si;
+ else if (GET_MODE (mem) == HImode)
+ fn = gen_load_lockedhi_si;
+ else
+ fn = gen_load_lockedsi;
break;
case DImode:
fn = gen_load_lockeddi;
break;
+ case TImode:
+ fn = gen_load_lockedti;
+ break;
default:
gcc_unreachable ();
}
@@ -16944,12 +17836,21 @@ emit_store_conditional (enum machine_mode mode, rtx res, rtx mem, rtx val)
switch (mode)
{
+ case QImode:
+ fn = gen_store_conditionalqi;
+ break;
+ case HImode:
+ fn = gen_store_conditionalhi;
+ break;
case SImode:
fn = gen_store_conditionalsi;
break;
case DImode:
fn = gen_store_conditionaldi;
break;
+ case TImode:
+ fn = gen_store_conditionalti;
+ break;
default:
gcc_unreachable ();
}
@@ -17046,8 +17947,9 @@ rs6000_adjust_atomic_subword (rtx orig_mem, rtx *pshift, rtx *pmask)
shift = gen_reg_rtx (SImode);
addr = gen_lowpart (SImode, addr);
emit_insn (gen_rlwinm (shift, addr, GEN_INT (3), GEN_INT (shift_mask)));
- shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
- shift, 1, OPTAB_LIB_WIDEN);
+ if (WORDS_BIG_ENDIAN)
+ shift = expand_simple_binop (SImode, XOR, shift, GEN_INT (shift_mask),
+ shift, 1, OPTAB_LIB_WIDEN);
*pshift = shift;
/* Mask for insertion. */
@@ -17095,7 +17997,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
{
rtx boolval, retval, mem, oldval, newval, cond;
rtx label1, label2, x, mask, shift;
- enum machine_mode mode;
+ enum machine_mode mode, orig_mode;
enum memmodel mod_s, mod_f;
bool is_weak;
@@ -17107,22 +18009,29 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
is_weak = (INTVAL (operands[5]) != 0);
mod_s = (enum memmodel) INTVAL (operands[6]);
mod_f = (enum memmodel) INTVAL (operands[7]);
- mode = GET_MODE (mem);
+ orig_mode = mode = GET_MODE (mem);
mask = shift = NULL_RTX;
if (mode == QImode || mode == HImode)
{
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
- /* Shift and mask OLDVAL into position with the word. */
+ /* Before power8, we didn't have access to lbarx/lharx, so generate a
+ lwarx and shift/mask operations. With power8, we need to do the
+ comparison in SImode, but the store is still done in QI/HImode. */
oldval = convert_modes (SImode, mode, oldval, 1);
- oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
- /* Shift and mask NEWVAL into position within the word. */
- newval = convert_modes (SImode, mode, newval, 1);
- newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ if (!TARGET_SYNC_HI_QI)
+ {
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
+
+ /* Shift and mask OLDVAL into position with the word. */
+ oldval = expand_simple_binop (SImode, ASHIFT, oldval, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+
+ /* Shift and mask NEWVAL into position within the word. */
+ newval = convert_modes (SImode, mode, newval, 1);
+ newval = expand_simple_binop (SImode, ASHIFT, newval, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ }
/* Prepare to adjust the return value. */
retval = gen_reg_rtx (SImode);
@@ -17151,7 +18060,25 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
}
cond = gen_reg_rtx (CCmode);
- x = gen_rtx_COMPARE (CCmode, x, oldval);
+ /* If we have TImode, synthesize a comparison. */
+ if (mode != TImode)
+ x = gen_rtx_COMPARE (CCmode, x, oldval);
+ else
+ {
+ rtx xor1_result = gen_reg_rtx (DImode);
+ rtx xor2_result = gen_reg_rtx (DImode);
+ rtx or_result = gen_reg_rtx (DImode);
+ rtx new_word0 = simplify_gen_subreg (DImode, x, TImode, 0);
+ rtx new_word1 = simplify_gen_subreg (DImode, x, TImode, 8);
+ rtx old_word0 = simplify_gen_subreg (DImode, oldval, TImode, 0);
+ rtx old_word1 = simplify_gen_subreg (DImode, oldval, TImode, 8);
+
+ emit_insn (gen_xordi3 (xor1_result, new_word0, old_word0));
+ emit_insn (gen_xordi3 (xor2_result, new_word1, old_word1));
+ emit_insn (gen_iordi3 (or_result, xor1_result, xor2_result));
+ x = gen_rtx_COMPARE (CCmode, or_result, const0_rtx);
+ }
+
emit_insn (gen_rtx_SET (VOIDmode, cond, x));
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
@@ -17161,7 +18088,7 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
if (mask)
x = rs6000_mask_atomic_subword (retval, newval, mask);
- emit_store_conditional (mode, cond, mem, x);
+ emit_store_conditional (orig_mode, cond, mem, x);
if (!is_weak)
{
@@ -17179,6 +18106,8 @@ rs6000_expand_atomic_compare_and_swap (rtx operands[])
if (shift)
rs6000_finish_atomic_subword (operands[1], retval, shift);
+ else if (mode != GET_MODE (operands[1]))
+ convert_move (operands[1], retval, 1);
/* In all cases, CR0 contains EQ on success, and NE on failure. */
x = gen_rtx_EQ (SImode, cond, const0_rtx);
@@ -17202,7 +18131,7 @@ rs6000_expand_atomic_exchange (rtx operands[])
mode = GET_MODE (mem);
mask = shift = NULL_RTX;
- if (mode == QImode || mode == HImode)
+ if (!TARGET_SYNC_HI_QI && (mode == QImode || mode == HImode))
{
mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
@@ -17251,53 +18180,70 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
{
enum memmodel model = (enum memmodel) INTVAL (model_rtx);
enum machine_mode mode = GET_MODE (mem);
+ enum machine_mode store_mode = mode;
rtx label, x, cond, mask, shift;
rtx before = orig_before, after = orig_after;
mask = shift = NULL_RTX;
+ /* On power8, we want to use SImode for the operation. On previous systems,
+ use the operation in a subword and shift/mask to get the proper byte or
+ halfword. */
if (mode == QImode || mode == HImode)
{
- mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
-
- /* Shift and mask VAL into position with the word. */
- val = convert_modes (SImode, mode, val, 1);
- val = expand_simple_binop (SImode, ASHIFT, val, shift,
- NULL_RTX, 1, OPTAB_LIB_WIDEN);
+ if (TARGET_SYNC_HI_QI)
+ {
+ val = convert_modes (SImode, mode, val, 1);
- switch (code)
+ /* Prepare to adjust the return value. */
+ before = gen_reg_rtx (SImode);
+ if (after)
+ after = gen_reg_rtx (SImode);
+ mode = SImode;
+ }
+ else
{
- case IOR:
- case XOR:
- /* We've already zero-extended VAL. That is sufficient to
- make certain that it does not affect other bits. */
- mask = NULL;
- break;
+ mem = rs6000_adjust_atomic_subword (mem, &shift, &mask);
- case AND:
- /* If we make certain that all of the other bits in VAL are
- set, that will be sufficient to not affect other bits. */
- x = gen_rtx_NOT (SImode, mask);
- x = gen_rtx_IOR (SImode, x, val);
- emit_insn (gen_rtx_SET (VOIDmode, val, x));
- mask = NULL;
- break;
+ /* Shift and mask VAL into position with the word. */
+ val = convert_modes (SImode, mode, val, 1);
+ val = expand_simple_binop (SImode, ASHIFT, val, shift,
+ NULL_RTX, 1, OPTAB_LIB_WIDEN);
- case NOT:
- case PLUS:
- case MINUS:
- /* These will all affect bits outside the field and need
- adjustment via MASK within the loop. */
- break;
+ switch (code)
+ {
+ case IOR:
+ case XOR:
+ /* We've already zero-extended VAL. That is sufficient to
+ make certain that it does not affect other bits. */
+ mask = NULL;
+ break;
- default:
- gcc_unreachable ();
- }
+ case AND:
+ /* If we make certain that all of the other bits in VAL are
+ set, that will be sufficient to not affect other bits. */
+ x = gen_rtx_NOT (SImode, mask);
+ x = gen_rtx_IOR (SImode, x, val);
+ emit_insn (gen_rtx_SET (VOIDmode, val, x));
+ mask = NULL;
+ break;
- /* Prepare to adjust the return value. */
- before = gen_reg_rtx (SImode);
- if (after)
- after = gen_reg_rtx (SImode);
- mode = SImode;
+ case NOT:
+ case PLUS:
+ case MINUS:
+ /* These will all affect bits outside the field and need
+ adjustment via MASK within the loop. */
+ break;
+
+ default:
+ gcc_unreachable ();
+ }
+
+ /* Prepare to adjust the return value. */
+ before = gen_reg_rtx (SImode);
+ if (after)
+ after = gen_reg_rtx (SImode);
+ store_mode = mode = SImode;
+ }
}
mem = rs6000_pre_atomic_barrier (mem, model);
@@ -17330,9 +18276,11 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
NULL_RTX, 1, OPTAB_LIB_WIDEN);
x = rs6000_mask_atomic_subword (before, x, mask);
}
+ else if (store_mode != mode)
+ x = convert_modes (store_mode, mode, x, 1);
cond = gen_reg_rtx (CCmode);
- emit_store_conditional (mode, cond, mem, x);
+ emit_store_conditional (store_mode, cond, mem, x);
x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
emit_unlikely_jump (x, label);
@@ -17341,11 +18289,22 @@ rs6000_expand_atomic_op (enum rtx_code code, rtx mem, rtx val,
if (shift)
{
+ /* QImode/HImode on machines without lbarx/lharx where we do a lwarx and
+ then do the calcuations in a SImode register. */
if (orig_before)
rs6000_finish_atomic_subword (orig_before, before, shift);
if (orig_after)
rs6000_finish_atomic_subword (orig_after, after, shift);
}
+ else if (store_mode != mode)
+ {
+ /* QImode/HImode on machines with lbarx/lharx where we do the native
+ operation and then do the calcuations in a SImode register. */
+ if (orig_before)
+ convert_move (orig_before, before, 1);
+ if (orig_after)
+ convert_move (orig_after, after, 1);
+ }
else if (orig_after && after != orig_after)
emit_move_insn (orig_after, after);
}
@@ -22360,7 +23319,10 @@ output_toc (FILE *file, rtx x, int labelno, enum machine_mode mode)
fputs (DOUBLE_INT_ASM_OP, file);
else
fprintf (file, "\t.tc FS_%lx[TC],", l & 0xffffffff);
- fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+ if (WORDS_BIG_ENDIAN)
+ fprintf (file, "0x%lx00000000\n", l & 0xffffffff);
+ else
+ fprintf (file, "0x%lx\n", l & 0xffffffff);
return;
}
else
@@ -22951,6 +23913,7 @@ rs6000_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
|| rs6000_cpu_attr == CPU_POWER4
|| rs6000_cpu_attr == CPU_POWER5
|| rs6000_cpu_attr == CPU_POWER7
+ || rs6000_cpu_attr == CPU_POWER8
|| rs6000_cpu_attr == CPU_CELL)
&& recog_memoized (dep_insn)
&& (INSN_CODE (dep_insn) >= 0))
@@ -23233,7 +24196,8 @@ is_microcoded_insn (rtx insn)
if (rs6000_cpu_attr == CPU_CELL)
return get_attr_cell_micro (insn) == CELL_MICRO_ALWAYS;
- if (rs6000_sched_groups)
+ if (rs6000_sched_groups
+ && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
{
enum attr_type type = get_attr_type (insn);
if (type == TYPE_LOAD_EXT_U
@@ -23258,7 +24222,8 @@ is_cracked_insn (rtx insn)
|| GET_CODE (PATTERN (insn)) == CLOBBER)
return false;
- if (rs6000_sched_groups)
+ if (rs6000_sched_groups
+ && (rs6000_cpu == PROCESSOR_POWER4 || rs6000_cpu == PROCESSOR_POWER5))
{
enum attr_type type = get_attr_type (insn);
if (type == TYPE_LOAD_U || type == TYPE_STORE_U
@@ -23537,6 +24502,8 @@ rs6000_issue_rate (void)
case CPU_POWER6:
case CPU_POWER7:
return 5;
+ case CPU_POWER8:
+ return 7;
default:
return 1;
}
@@ -24164,6 +25131,39 @@ insn_must_be_first_in_group (rtx insn)
break;
}
break;
+ case PROCESSOR_POWER8:
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_CR_LOGICAL:
+ case TYPE_DELAYED_CR:
+ case TYPE_MFCR:
+ case TYPE_MFCRF:
+ case TYPE_MTCR:
+ case TYPE_COMPARE:
+ case TYPE_DELAYED_COMPARE:
+ case TYPE_VAR_DELAYED_COMPARE:
+ case TYPE_IMUL_COMPARE:
+ case TYPE_LMUL_COMPARE:
+ case TYPE_SYNC:
+ case TYPE_ISYNC:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_LOAD_U:
+ case TYPE_LOAD_UX:
+ case TYPE_LOAD_EXT:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_STORE_UX:
+ case TYPE_VECSTORE:
+ case TYPE_MFJMPR:
+ case TYPE_MTJMPR:
+ return true;
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -24242,6 +25242,25 @@ insn_must_be_last_in_group (rtx insn)
break;
}
break;
+ case PROCESSOR_POWER8:
+ type = get_attr_type (insn);
+
+ switch (type)
+ {
+ case TYPE_MFCR:
+ case TYPE_MTCR:
+ case TYPE_ISYNC:
+ case TYPE_SYNC:
+ case TYPE_LOAD_L:
+ case TYPE_STORE_C:
+ case TYPE_LOAD_EXT_U:
+ case TYPE_LOAD_EXT_UX:
+ case TYPE_STORE_UX:
+ return true;
+ default:
+ break;
+ }
+ break;
default:
break;
}
@@ -24331,8 +25350,9 @@ force_new_group (int sched_verbose, FILE *dump, rtx *group_insns,
if (can_issue_more && !is_branch_slot_insn (next_insn))
can_issue_more--;
- /* Power6 and Power7 have special group ending nop. */
- if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7)
+ /* Do we have a special group ending nop? */
+ if (rs6000_cpu_attr == CPU_POWER6 || rs6000_cpu_attr == CPU_POWER7
+ || rs6000_cpu_attr == CPU_POWER8)
{
nop = gen_group_ending_nop ();
emit_insn_before (nop, next_insn);
@@ -26513,7 +27533,8 @@ rs6000_register_move_cost (enum machine_mode mode,
/* For those processors that have slow LR/CTR moves, make them more
expensive than memory in order to bias spills to memory .*/
else if ((rs6000_cpu == PROCESSOR_POWER6
- || rs6000_cpu == PROCESSOR_POWER7)
+ || rs6000_cpu == PROCESSOR_POWER7
+ || rs6000_cpu == PROCESSOR_POWER8)
&& reg_classes_intersect_p (rclass, LINK_OR_CTR_REGS))
ret = 6 * hard_regno_nregs[0][mode];
@@ -26990,26 +28011,31 @@ bool
altivec_expand_vec_perm_const (rtx operands[4])
{
struct altivec_perm_insn {
+ HOST_WIDE_INT mask;
enum insn_code impl;
unsigned char perm[16];
};
static const struct altivec_perm_insn patterns[] = {
- { CODE_FOR_altivec_vpkuhum,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuhum,
{ 1, 3, 5, 7, 9, 11, 13, 15, 17, 19, 21, 23, 25, 27, 29, 31 } },
- { CODE_FOR_altivec_vpkuwum,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vpkuwum,
{ 2, 3, 6, 7, 10, 11, 14, 15, 18, 19, 22, 23, 26, 27, 30, 31 } },
- { CODE_FOR_altivec_vmrghb,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghb,
{ 0, 16, 1, 17, 2, 18, 3, 19, 4, 20, 5, 21, 6, 22, 7, 23 } },
- { CODE_FOR_altivec_vmrghh,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghh,
{ 0, 1, 16, 17, 2, 3, 18, 19, 4, 5, 20, 21, 6, 7, 22, 23 } },
- { CODE_FOR_altivec_vmrghw,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrghw,
{ 0, 1, 2, 3, 16, 17, 18, 19, 4, 5, 6, 7, 20, 21, 22, 23 } },
- { CODE_FOR_altivec_vmrglb,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglb,
{ 8, 24, 9, 25, 10, 26, 11, 27, 12, 28, 13, 29, 14, 30, 15, 31 } },
- { CODE_FOR_altivec_vmrglh,
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglh,
{ 8, 9, 24, 25, 10, 11, 26, 27, 12, 13, 28, 29, 14, 15, 30, 31 } },
- { CODE_FOR_altivec_vmrglw,
- { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } }
+ { OPTION_MASK_ALTIVEC, CODE_FOR_altivec_vmrglw,
+ { 8, 9, 10, 11, 24, 25, 26, 27, 12, 13, 14, 15, 28, 29, 30, 31 } },
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgew,
+ { 0, 1, 2, 3, 16, 17, 18, 19, 8, 9, 10, 11, 24, 25, 26, 27 } },
+ { OPTION_MASK_P8_VECTOR, CODE_FOR_p8_vmrgow,
+ { 4, 5, 6, 7, 20, 21, 22, 23, 12, 13, 14, 15, 28, 29, 30, 31 } }
};
unsigned int i, j, elt, which;
@@ -27109,6 +28135,9 @@ altivec_expand_vec_perm_const (rtx operands[4])
{
bool swapped;
+ if ((patterns[j].mask & rs6000_isa_flags) == 0)
+ continue;
+
elt = patterns[j].perm[0];
if (perm[0] == elt)
swapped = false;
@@ -27742,6 +28771,8 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{
{ "altivec", OPTION_MASK_ALTIVEC, false, true },
{ "cmpb", OPTION_MASK_CMPB, false, true },
+ { "crypto", OPTION_MASK_CRYPTO, false, true },
+ { "direct-move", OPTION_MASK_DIRECT_MOVE, false, true },
{ "dlmzb", OPTION_MASK_DLMZB, false, true },
{ "fprnd", OPTION_MASK_FPRND, false, true },
{ "hard-dfp", OPTION_MASK_DFP, false, true },
@@ -27750,13 +28781,17 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] =
{ "mfpgpr", OPTION_MASK_MFPGPR, false, true },
{ "mulhw", OPTION_MASK_MULHW, false, true },
{ "multiple", OPTION_MASK_MULTIPLE, false, true },
- { "update", OPTION_MASK_NO_UPDATE, true , true },
{ "popcntb", OPTION_MASK_POPCNTB, false, true },
{ "popcntd", OPTION_MASK_POPCNTD, false, true },
+ { "power8-fusion", OPTION_MASK_P8_FUSION, false, true },
+ { "power8-fusion-sign", OPTION_MASK_P8_FUSION_SIGN, false, true },
+ { "power8-vector", OPTION_MASK_P8_VECTOR, false, true },
{ "powerpc-gfxopt", OPTION_MASK_PPC_GFXOPT, false, true },
{ "powerpc-gpopt", OPTION_MASK_PPC_GPOPT, false, true },
+ { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true },
{ "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true },
{ "string", OPTION_MASK_STRING, false, true },
+ { "update", OPTION_MASK_NO_UPDATE, true , true },
{ "vsx", OPTION_MASK_VSX, false, true },
{ "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true },
#ifdef OPTION_MASK_64BIT
@@ -27798,6 +28833,8 @@ static struct rs6000_opt_mask const rs6000_builtin_mask_names[] =
{ "frsqrtes", RS6000_BTM_FRSQRTES, false, false },
{ "popcntd", RS6000_BTM_POPCNTD, false, false },
{ "cell", RS6000_BTM_CELL, false, false },
+ { "power8-vector", RS6000_BTM_P8_VECTOR, false, false },
+ { "crypto", RS6000_BTM_CRYPTO, false, false },
};
/* Option variables that we want to support inside attribute((target)) and
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 6549347b9b7..633d7891157 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -92,7 +92,7 @@
#ifdef HAVE_AS_POWER8
#define ASM_CPU_POWER8_SPEC "-mpower8"
#else
-#define ASM_CPU_POWER8_SPEC "-mpower4 -maltivec"
+#define ASM_CPU_POWER8_SPEC ASM_CPU_POWER7_SPEC
#endif
#ifdef HAVE_AS_DCI
@@ -164,6 +164,7 @@
%{mcpu=e6500: -me6500} \
%{maltivec: -maltivec} \
%{mvsx: -mvsx %{!maltivec: -maltivec} %{!mcpu*: %(asm_cpu_power7)}} \
+%{mpower8-vector|mcrypto|mdirect-move: %{!mcpu*: %(asm_cpu_power8)}} \
-many"
#define CPP_DEFAULT_SPEC ""
@@ -277,6 +278,19 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define TARGET_POPCNTD 0
#endif
+/* Define the ISA 2.07 flags as 0 if the target assembler does not support the
+ waitasecond instruction. Allow -mpower8-fusion, since it does not add new
+ instructions. */
+
+#ifndef HAVE_AS_POWER8
+#undef TARGET_DIRECT_MOVE
+#undef TARGET_CRYPTO
+#undef TARGET_P8_VECTOR
+#define TARGET_DIRECT_MOVE 0
+#define TARGET_CRYPTO 0
+#define TARGET_P8_VECTOR 0
+#endif
+
/* Define TARGET_LWSYNC_INSTRUCTION if the assembler knows about lwsync. If
not, generate the lwsync code as an integer constant. */
#ifdef HAVE_AS_LWSYNC
@@ -386,6 +400,7 @@ extern const char *host_detect_local_cpu (int argc, const char **argv);
#define TARGET_DEBUG_TARGET (rs6000_debug & MASK_DEBUG_TARGET)
#define TARGET_DEBUG_BUILTIN (rs6000_debug & MASK_DEBUG_BUILTIN)
+/* Describe the vector unit used for arithmetic operations. */
extern enum rs6000_vector rs6000_vector_unit[];
#define VECTOR_UNIT_NONE_P(MODE) \
@@ -394,12 +409,25 @@ extern enum rs6000_vector rs6000_vector_unit[];
#define VECTOR_UNIT_VSX_P(MODE) \
(rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+#define VECTOR_UNIT_P8_VECTOR_P(MODE) \
+ (rs6000_vector_unit[(MODE)] == VECTOR_P8_VECTOR)
+
#define VECTOR_UNIT_ALTIVEC_P(MODE) \
(rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC)
+#define VECTOR_UNIT_VSX_OR_P8_VECTOR_P(MODE) \
+ (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \
+ (int)VECTOR_VSX, \
+ (int)VECTOR_P8_VECTOR))
+
+/* VECTOR_UNIT_ALTIVEC_OR_VSX_P is used in places where we are using either
+ altivec (VMX) or VSX vector instructions. P8 vector support is upwards
+ compatible, so allow it as well, rather than changing all of the uses of the
+ macro. */
#define VECTOR_UNIT_ALTIVEC_OR_VSX_P(MODE) \
- (rs6000_vector_unit[(MODE)] == VECTOR_ALTIVEC \
- || rs6000_vector_unit[(MODE)] == VECTOR_VSX)
+ (IN_RANGE ((int)rs6000_vector_unit[(MODE)], \
+ (int)VECTOR_ALTIVEC, \
+ (int)VECTOR_P8_VECTOR))
/* Describe whether to use VSX loads or Altivec loads. For now, just use the
same unit as the vector unit we are using, but we may want to migrate to
@@ -412,12 +440,21 @@ extern enum rs6000_vector rs6000_vector_mem[];
#define VECTOR_MEM_VSX_P(MODE) \
(rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+#define VECTOR_MEM_P8_VECTOR_P(MODE) \
+ (rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+
#define VECTOR_MEM_ALTIVEC_P(MODE) \
(rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC)
+#define VECTOR_MEM_VSX_OR_P8_VECTOR_P(MODE) \
+ (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \
+ (int)VECTOR_VSX, \
+ (int)VECTOR_P8_VECTOR))
+
#define VECTOR_MEM_ALTIVEC_OR_VSX_P(MODE) \
- (rs6000_vector_mem[(MODE)] == VECTOR_ALTIVEC \
- || rs6000_vector_mem[(MODE)] == VECTOR_VSX)
+ (IN_RANGE ((int)rs6000_vector_mem[(MODE)], \
+ (int)VECTOR_ALTIVEC, \
+ (int)VECTOR_P8_VECTOR))
/* Return the alignment of a given vector type, which is set based on the
vector unit use. VSX for instance can load 32 or 64 bit aligned words
@@ -479,6 +516,15 @@ extern int rs6000_vector_align[];
#define TARGET_FCTIDUZ TARGET_POPCNTD
#define TARGET_FCTIWUZ TARGET_POPCNTD
+#define TARGET_XSCVDPSPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+#define TARGET_XSCVSPDPN (TARGET_DIRECT_MOVE || TARGET_P8_VECTOR)
+
+/* Byte/char syncs were added as phased in for ISA 2.06B, but are not present
+ in power7, so conditionalize them on p8 features. TImode syncs need quad
+ memory support. */
+#define TARGET_SYNC_HI_QI (TARGET_QUAD_MEMORY || TARGET_DIRECT_MOVE)
+#define TARGET_SYNC_TI TARGET_QUAD_MEMORY
+
/* Power7 has both 32-bit load and store integer for the FPRs, so we don't need
to allocate the SDmode stack slot to get the value into the proper location
in the register. */
@@ -489,10 +535,13 @@ extern int rs6000_vector_align[];
OPTION_MASK_<xxx> back into MASK_<xxx>. */
#define MASK_ALTIVEC OPTION_MASK_ALTIVEC
#define MASK_CMPB OPTION_MASK_CMPB
+#define MASK_CRYPTO OPTION_MASK_CRYPTO
#define MASK_DFP OPTION_MASK_DFP
+#define MASK_DIRECT_MOVE OPTION_MASK_DIRECT_MOVE
#define MASK_DLMZB OPTION_MASK_DLMZB
#define MASK_EABI OPTION_MASK_EABI
#define MASK_FPRND OPTION_MASK_FPRND
+#define MASK_P8_FUSION OPTION_MASK_P8_FUSION
#define MASK_HARD_FLOAT OPTION_MASK_HARD_FLOAT
#define MASK_ISEL OPTION_MASK_ISEL
#define MASK_MFCRF OPTION_MASK_MFCRF
@@ -500,6 +549,7 @@ extern int rs6000_vector_align[];
#define MASK_MULHW OPTION_MASK_MULHW
#define MASK_MULTIPLE OPTION_MASK_MULTIPLE
#define MASK_NO_UPDATE OPTION_MASK_NO_UPDATE
+#define MASK_P8_VECTOR OPTION_MASK_P8_VECTOR
#define MASK_POPCNTB OPTION_MASK_POPCNTB
#define MASK_POPCNTD OPTION_MASK_POPCNTD
#define MASK_PPC_GFXOPT OPTION_MASK_PPC_GFXOPT
@@ -665,6 +715,11 @@ extern unsigned char rs6000_recip_bits[];
instructions for them. Might as well be consistent with bits and bytes. */
#define WORDS_BIG_ENDIAN 1
+/* This says that for the IBM long double the larger magnitude double
+ comes first. It's really a two element double array, and arrays
+ don't index differently between little- and big-endian. */
+#define LONG_DOUBLE_LARGE_FIRST 1
+
#define MAX_BITS_PER_WORD 64
/* Width of a word, in units (bytes). */
@@ -758,12 +813,6 @@ extern unsigned rs6000_pointer_size;
/* No data type wants to be aligned rounder than this. */
#define BIGGEST_ALIGNMENT 128
-/* A C expression to compute the alignment for a variables in the
- local store. TYPE is the data type, and ALIGN is the alignment
- that the object would ordinarily have. */
-#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
- DATA_ALIGNMENT (TYPE, ALIGN)
-
/* Alignment of field after `int : 0' in a structure. */
#define EMPTY_FIELD_BOUNDARY 32
@@ -773,8 +822,15 @@ extern unsigned rs6000_pointer_size;
/* A bit-field declared as `int' forces `int' alignment for the struct. */
#define PCC_BITFIELD_TYPE_MATTERS 1
-/* Make strings word-aligned so strcpy from constants will be faster.
- Make vector constants quadword aligned. */
+enum data_align { align_abi, align_opt, align_both };
+
+/* A C expression to compute the alignment for a variables in the
+ local store. TYPE is the data type, and ALIGN is the alignment
+ that the object would ordinarily have. */
+#define LOCAL_ALIGNMENT(TYPE, ALIGN) \
+ rs6000_data_alignment (TYPE, ALIGN, align_both)
+
+/* Make strings word-aligned so strcpy from constants will be faster. */
#define CONSTANT_ALIGNMENT(EXP, ALIGN) \
(TREE_CODE (EXP) == STRING_CST \
&& (STRICT_ALIGNMENT || !optimize_size) \
@@ -782,21 +838,14 @@ extern unsigned rs6000_pointer_size;
? BITS_PER_WORD \
: (ALIGN))
-/* Make arrays of chars word-aligned for the same reasons.
- Align vectors to 128 bits. Align SPE vectors and E500 v2 doubles to
+/* Make arrays of chars word-aligned for the same reasons. */
+#define DATA_ALIGNMENT(TYPE, ALIGN) \
+ rs6000_data_alignment (TYPE, ALIGN, align_opt)
+
+/* Align vectors to 128 bits. Align SPE vectors and E500 v2 doubles to
64 bits. */
-#define DATA_ALIGNMENT(TYPE, ALIGN) \
- (TREE_CODE (TYPE) == VECTOR_TYPE \
- ? (((TARGET_SPE && SPE_VECTOR_MODE (TYPE_MODE (TYPE))) \
- || (TARGET_PAIRED_FLOAT && PAIRED_VECTOR_MODE (TYPE_MODE (TYPE)))) \
- ? 64 : 128) \
- : ((TARGET_E500_DOUBLE \
- && TREE_CODE (TYPE) == REAL_TYPE \
- && TYPE_MODE (TYPE) == DFmode) \
- ? 64 \
- : (TREE_CODE (TYPE) == ARRAY_TYPE \
- && TYPE_MODE (TREE_TYPE (TYPE)) == QImode \
- && (ALIGN) < BITS_PER_WORD) ? BITS_PER_WORD : (ALIGN)))
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) \
+ rs6000_data_alignment (TYPE, ALIGN, align_abi)
/* Nonzero if move instructions will actually fail to work
when given unaligned data. */
@@ -1002,7 +1051,9 @@ extern unsigned rs6000_pointer_size;
#define REG_ALLOC_ORDER \
{32, \
- 45, 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, \
+ /* move fr13 (ie 45) later, so if we need TFmode, it does */ \
+ /* not use fr14 which is a saved register. */ \
+ 44, 43, 42, 41, 40, 39, 38, 37, 36, 35, 34, 45, \
33, \
63, 62, 61, 60, 59, 58, 57, 56, 55, 54, 53, 52, 51, \
50, 49, 48, 47, 46, \
@@ -1062,8 +1113,14 @@ extern unsigned rs6000_pointer_size;
#define VINT_REGNO_P(N) ALTIVEC_REGNO_P (N)
/* Alternate name for any vector register supporting logical operations, no
- matter which instruction set(s) are available. */
-#define VLOGICAL_REGNO_P(N) VFLOAT_REGNO_P (N)
+ matter which instruction set(s) are available. For 64-bit mode, we also
+ allow logical operations in the GPRS. This is to allow atomic quad word
+ builtins not to need the VSX registers for lqarx/stqcx. It also helps with
+ __int128_t arguments that are passed in GPRs. */
+#define VLOGICAL_REGNO_P(N) \
+ (ALTIVEC_REGNO_P (N) \
+ || (TARGET_VSX && FP_REGNO_P (N)) \
+ || (TARGET_VSX && TARGET_POWERPC64 && INT_REGNO_P (N)))
/* Return number of consecutive hard regs needed starting at reg REGNO
to hold something of mode MODE. */
@@ -1124,7 +1181,7 @@ extern unsigned rs6000_pointer_size;
when one has mode MODE1 and one has mode MODE2.
If HARD_REGNO_MODE_OK could produce different values for MODE1 and MODE2,
for any hard reg, then this must be 0 for correct output. */
-#define MODES_TIEABLE_P(MODE1, MODE2) \
+#define MODES_TIEABLE_P(MODE1, MODE2) \
(SCALAR_FLOAT_MODE_P (MODE1) \
? SCALAR_FLOAT_MODE_P (MODE2) \
: SCALAR_FLOAT_MODE_P (MODE2) \
@@ -1137,14 +1194,14 @@ extern unsigned rs6000_pointer_size;
? SPE_VECTOR_MODE (MODE2) \
: SPE_VECTOR_MODE (MODE2) \
? SPE_VECTOR_MODE (MODE1) \
- : ALTIVEC_VECTOR_MODE (MODE1) \
- ? ALTIVEC_VECTOR_MODE (MODE2) \
- : ALTIVEC_VECTOR_MODE (MODE2) \
- ? ALTIVEC_VECTOR_MODE (MODE1) \
: ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \
? ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \
: ALTIVEC_OR_VSX_VECTOR_MODE (MODE2) \
? ALTIVEC_OR_VSX_VECTOR_MODE (MODE1) \
+ : ALTIVEC_VECTOR_MODE (MODE1) \
+ ? ALTIVEC_VECTOR_MODE (MODE2) \
+ : ALTIVEC_VECTOR_MODE (MODE2) \
+ ? ALTIVEC_VECTOR_MODE (MODE1) \
: 1)
/* Post-reload, we can't use any new AltiVec registers, as we already
@@ -1337,8 +1394,11 @@ enum r6000_reg_class_enum {
RS6000_CONSTRAINT_wg, /* FPR register for -mmfpgpr */
RS6000_CONSTRAINT_wf, /* VSX register for V4SF */
RS6000_CONSTRAINT_wl, /* FPR register for LFIWAX */
+ RS6000_CONSTRAINT_wm, /* VSX register for direct move */
+ RS6000_CONSTRAINT_wr, /* GPR register if 64-bit */
RS6000_CONSTRAINT_ws, /* VSX register for DF */
RS6000_CONSTRAINT_wt, /* VSX register for TImode */
+ RS6000_CONSTRAINT_wv, /* Altivec register for power8 vector */
RS6000_CONSTRAINT_wx, /* FPR register for STFIWX */
RS6000_CONSTRAINT_wz, /* FPR register for LFIWZX */
RS6000_CONSTRAINT_MAX
@@ -2297,6 +2357,13 @@ extern char rs6000_reg_names[][8]; /* register names (0 vs. %r0). */
/* How to align the given loop. */
#define LOOP_ALIGN(LABEL) rs6000_loop_align(LABEL)
+/* Alignment guaranteed by __builtin_malloc. */
+/* FIXME: 128-bit alignment is guaranteed by glibc for TARGET_64BIT.
+ However, specifying the stronger guarantee currently leads to
+ a regression in SPEC CPU2006 437.leslie3d. The stronger
+ guarantee should be implemented here once that's fixed. */
+#define MALLOC_ABI_ALIGNMENT (64)
+
/* Pick up the return address upon entry to a procedure. Used for
dwarf2 unwind information. This also enables the table driven
mechanism. */
@@ -2365,6 +2432,8 @@ extern int frame_pointer_needed;
#define RS6000_BTM_ALWAYS 0 /* Always enabled. */
#define RS6000_BTM_ALTIVEC MASK_ALTIVEC /* VMX/altivec vectors. */
#define RS6000_BTM_VSX MASK_VSX /* VSX (vector/scalar). */
+#define RS6000_BTM_P8_VECTOR MASK_P8_VECTOR /* ISA 2.07 vector. */
+#define RS6000_BTM_CRYPTO MASK_CRYPTO /* crypto funcs. */
#define RS6000_BTM_SPE MASK_STRING /* E500 */
#define RS6000_BTM_PAIRED MASK_MULHW /* 750CL paired insns. */
#define RS6000_BTM_FRE MASK_POPCNTB /* FRE instruction. */
@@ -2376,6 +2445,8 @@ extern int frame_pointer_needed;
#define RS6000_BTM_COMMON (RS6000_BTM_ALTIVEC \
| RS6000_BTM_VSX \
+ | RS6000_BTM_P8_VECTOR \
+ | RS6000_BTM_CRYPTO \
| RS6000_BTM_FRE \
| RS6000_BTM_FRES \
| RS6000_BTM_FRSQRTE \
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 1e65ac1cde0..010e21f7413 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -127,6 +127,13 @@
UNSPEC_LFIWZX
UNSPEC_FCTIWUZ
UNSPEC_GRP_END_NOP
+ UNSPEC_P8V_FMRGOW
+ UNSPEC_P8V_MTVSRWZ
+ UNSPEC_P8V_RELOAD_FROM_GPR
+ UNSPEC_P8V_MTVSRD
+ UNSPEC_P8V_XXPERMDI
+ UNSPEC_P8V_RELOAD_FROM_VSX
+ UNSPEC_FUSION_GPR
])
;;
@@ -146,7 +153,7 @@
;; Define an insn type attribute. This is used in function unit delay
;; computations.
-(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt"
+(define_attr "type" "integer,two,three,load,load_ext,load_ext_u,load_ext_ux,load_ux,load_u,store,store_ux,store_u,fpload,fpload_ux,fpload_u,fpstore,fpstore_ux,fpstore_u,vecload,vecstore,imul,imul2,imul3,lmul,idiv,ldiv,insert_word,branch,cmp,fast_compare,compare,var_delayed_compare,delayed_compare,imul_compare,lmul_compare,fpcompare,cr_logical,delayed_cr,mfcr,mfcrf,mtcr,mfjmpr,mtjmpr,fp,fpsimple,dmul,sdiv,ddiv,ssqrt,dsqrt,jmpreg,brinc,vecsimple,veccomplex,vecdiv,veccmp,veccmpsimple,vecperm,vecfloat,vecfdiv,vecdouble,isync,sync,load_l,store_c,shift,trap,insert_dword,var_shift_rotate,cntlz,exts,mffgpr,mftgpr,isel,popcnt,crypto"
(const_string "integer"))
;; Define floating point instruction sub-types for use with Xfpu.md
@@ -166,9 +173,14 @@
(const_int 4)))
;; Processor type -- this attribute must exactly match the processor_type
-;; enumeration in rs6000.h.
-
-(define_attr "cpu" "rs64a,mpccore,ppc403,ppc405,ppc440,ppc476,ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,ppc750,ppc7400,ppc7450,ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,power4,power5,power6,power7,cell,ppca2,titan"
+;; enumeration in rs6000-opts.h.
+(define_attr "cpu"
+ "ppc601,ppc603,ppc604,ppc604e,ppc620,ppc630,
+ ppc750,ppc7400,ppc7450,
+ ppc403,ppc405,ppc440,ppc476,
+ ppc8540,ppc8548,ppce300c2,ppce300c3,ppce500mc,ppce500mc64,ppce5500,ppce6500,
+ power4,power5,power6,power7,power8,
+ rs64a,mpccore,cell,ppca2,titan"
(const (symbol_ref "rs6000_cpu_attr")))
@@ -201,6 +213,7 @@
(include "power5.md")
(include "power6.md")
(include "power7.md")
+(include "power8.md")
(include "cell.md")
(include "xfpu.md")
(include "a2.md")
@@ -227,6 +240,12 @@
; extend modes for DImode
(define_mode_iterator QHSI [QI HI SI])
+; QImode or HImode for small atomic ops
+(define_mode_iterator QHI [QI HI])
+
+; HImode or SImode for sign extended fusion ops
+(define_mode_iterator HSI [HI SI])
+
; SImode or DImode, even if DImode doesn't fit in GPRs.
(define_mode_iterator SDI [SI DI])
@@ -268,6 +287,15 @@
(define_mode_iterator FMOVE128 [(TF "!TARGET_IEEEQUAD && TARGET_LONG_DOUBLE_128")
(TD "TARGET_HARD_FLOAT && TARGET_FPRS")])
+; Iterators for 128 bit types for direct move
+(define_mode_iterator FMOVE128_GPR [(TI "TARGET_VSX_TIMODE")
+ (V16QI "")
+ (V8HI "")
+ (V4SI "")
+ (V4SF "")
+ (V2DI "")
+ (V2DF "")])
+
; Whether a floating point move is ok, don't allow SD without hardware FP
(define_mode_attr fmove_ok [(SF "")
(DF "")
@@ -284,11 +312,16 @@
(define_mode_attr f32_lr [(SF "f") (SD "wz")])
(define_mode_attr f32_lm [(SF "m") (SD "Z")])
(define_mode_attr f32_li [(SF "lfs%U1%X1 %0,%1") (SD "lfiwzx %0,%y1")])
+(define_mode_attr f32_lv [(SF "lxsspx %0,%y1") (SD "lxsiwzx %0,%y1")])
; Definitions for store from 32-bit fpr register
(define_mode_attr f32_sr [(SF "f") (SD "wx")])
(define_mode_attr f32_sm [(SF "m") (SD "Z")])
(define_mode_attr f32_si [(SF "stfs%U0%X0 %1,%0") (SD "stfiwx %1,%y0")])
+(define_mode_attr f32_sv [(SF "stxsspx %1,%y0") (SD "stxsiwzx %1,%y0")])
+
+; Definitions for 32-bit fpr direct move
+(define_mode_attr f32_dm [(SF "wn") (SD "wm")])
; These modes do not fit in integer registers in 32-bit mode.
; but on e500v2, the gpr are 64 bit registers
@@ -368,7 +401,7 @@
(define_insn "*zero_extend<mode>di2_internal1"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
(zero_extend:DI (match_operand:QHSI 1 "reg_or_mem_operand" "m,r")))]
- "TARGET_POWERPC64"
+ "TARGET_POWERPC64 && (<MODE>mode != SImode || !TARGET_LFIWZX)"
"@
l<wd>z%U1%X1 %0,%1
rldicl %0,%1,0,<dbits>"
@@ -434,6 +467,29 @@
(const_int 0)))]
"")
+(define_insn "*zero_extendsidi2_lfiwzx"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wz,!wm")
+ (zero_extend:DI (match_operand:SI 1 "reg_or_mem_operand" "m,r,r,Z,Z")))]
+ "TARGET_POWERPC64 && TARGET_LFIWZX"
+ "@
+ lwz%U1%X1 %0,%1
+ rldicl %0,%1,0,32
+ mtvsrwz %x0,%1
+ lfiwzx %0,%y1
+ lxsiwzx %x0,%y1"
+ [(set_attr_alternative "type"
+ [(if_then_else
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ux")
+ (if_then_else
+ (match_test "update_address_mem (operands[1], VOIDmode)")
+ (const_string "load_u")
+ (const_string "load")))
+ (const_string "*")
+ (const_string "mffgpr")
+ (const_string "fpload")
+ (const_string "fpload")])])
+
(define_insn "extendqidi2"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
(sign_extend:DI (match_operand:QI 1 "gpc_reg_operand" "r")))]
@@ -581,10 +637,33 @@
"TARGET_POWERPC64"
"")
-(define_insn ""
+(define_insn "*extendsidi2_lfiwax"
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=r,r,??wm,!wl,!wm")
+ (sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r,r,Z,Z")))]
+ "TARGET_POWERPC64 && TARGET_LFIWAX"
+ "@
+ lwa%U1%X1 %0,%1
+ extsw %0,%1
+ mtvsrwa %x0,%1
+ lfiwax %0,%y1
+ lxsiwax %x0,%y1"
+ [(set_attr_alternative "type"
+ [(if_then_else
+ (match_test "update_indexed_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ext_ux")
+ (if_then_else
+ (match_test "update_address_mem (operands[1], VOIDmode)")
+ (const_string "load_ext_u")
+ (const_string "load_ext")))
+ (const_string "exts")
+ (const_string "mffgpr")
+ (const_string "fpload")
+ (const_string "fpload")])])
+
+(define_insn "*extendsidi2_nocell"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r,r")
(sign_extend:DI (match_operand:SI 1 "lwa_operand" "m,r")))]
- "TARGET_POWERPC64 && rs6000_gen_cell_microcode"
+ "TARGET_POWERPC64 && rs6000_gen_cell_microcode && !TARGET_LFIWAX"
"@
lwa%U1%X1 %0,%1
extsw %0,%1"
@@ -598,7 +677,7 @@
(const_string "load_ext")))
(const_string "exts")])])
-(define_insn ""
+(define_insn "*extendsidi2_nocell"
[(set (match_operand:DI 0 "gpc_reg_operand" "=r")
(sign_extend:DI (match_operand:SI 1 "gpc_reg_operand" "r")))]
"TARGET_POWERPC64 && !rs6000_gen_cell_microcode"
@@ -2035,7 +2114,9 @@
[(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
(unspec:GPR [(match_operand:GPR 1 "gpc_reg_operand" "r")] UNSPEC_PARITY))]
"TARGET_CMPB && TARGET_POPCNTB"
- "prty<wd> %0,%1")
+ "prty<wd> %0,%1"
+ [(set_attr "length" "4")
+ (set_attr "type" "popcnt")])
(define_expand "parity<mode>2"
[(set (match_operand:GPR 0 "gpc_reg_operand" "")
@@ -4316,7 +4397,7 @@
#
#
#"
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
(set_attr "length" "4,4,4,8,8,8")])
(define_split
@@ -4348,7 +4429,7 @@
#
#
#"
- [(set_attr "type" "delayed_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
+ [(set_attr "type" "fast_compare,var_delayed_compare,delayed_compare,delayed_compare,var_delayed_compare,delayed_compare")
(set_attr "length" "4,4,4,8,8,8")])
(define_split
@@ -5104,6 +5185,41 @@
"frsqrtes %0,%1"
[(set_attr "type" "fp")])
+;; This expander is here to avoid FLOAT_WORDS_BIGENDIAN tests in
+;; builtins.c and optabs.c that are not correct for IBM long double
+;; when little-endian.
+(define_expand "signbittf2"
+ [(set (match_dup 2)
+ (float_truncate:DF (match_operand:TF 1 "gpc_reg_operand" "")))
+ (set (match_dup 3)
+ (subreg:DI (match_dup 2) 0))
+ (set (match_dup 4)
+ (match_dup 5))
+ (set (match_operand:SI 0 "gpc_reg_operand" "")
+ (match_dup 6))]
+ "!TARGET_IEEEQUAD
+ && TARGET_HARD_FLOAT
+ && (TARGET_FPRS || TARGET_E500_DOUBLE)
+ && TARGET_LONG_DOUBLE_128"
+{
+ operands[2] = gen_reg_rtx (DFmode);
+ operands[3] = gen_reg_rtx (DImode);
+ if (TARGET_POWERPC64)
+ {
+ operands[4] = gen_reg_rtx (DImode);
+ operands[5] = gen_rtx_LSHIFTRT (DImode, operands[3], GEN_INT (63));
+ operands[6] = gen_rtx_SUBREG (SImode, operands[4],
+ WORDS_BIG_ENDIAN ? 4 : 0);
+ }
+ else
+ {
+ operands[4] = gen_reg_rtx (SImode);
+ operands[5] = gen_rtx_SUBREG (SImode, operands[3],
+ WORDS_BIG_ENDIAN ? 0 : 4);
+ operands[6] = gen_rtx_LSHIFTRT (SImode, operands[4], GEN_INT (31));
+ }
+})
+
(define_expand "copysign<mode>3"
[(set (match_dup 3)
(abs:SFDF (match_operand:SFDF 1 "gpc_reg_operand" "")))
@@ -5553,12 +5669,15 @@
; We don't define lfiwax/lfiwzx with the normal definition, because we
; don't want to support putting SImode in FPR registers.
(define_insn "lfiwax"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
UNSPEC_LFIWAX))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWAX"
- "lfiwax %0,%y1"
- [(set_attr "type" "fpload")])
+ "@
+ lfiwax %0,%y1
+ lxsiwax %x0,%y1
+ mtvsrwa %x0,%1"
+ [(set_attr "type" "fpload,fpload,mffgpr")])
; This split must be run before register allocation because it allocates the
; memory slot that is needed to move values to/from the FPR. We don't allocate
@@ -5580,7 +5699,8 @@
rtx src = operands[1];
rtx tmp;
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+ if (!MEM_P (src) && TARGET_POWERPC64
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
tmp = convert_to_mode (DImode, src, false);
else
{
@@ -5629,12 +5749,15 @@
(set_attr "type" "fpload")])
(define_insn "lfiwzx"
- [(set (match_operand:DI 0 "gpc_reg_operand" "=d")
- (unspec:DI [(match_operand:SI 1 "indexed_or_indirect_operand" "Z")]
+ [(set (match_operand:DI 0 "gpc_reg_operand" "=d,wm,!wm")
+ (unspec:DI [(match_operand:SI 1 "reg_or_indexed_operand" "Z,Z,r")]
UNSPEC_LFIWZX))]
"TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT && TARGET_LFIWZX"
- "lfiwzx %0,%y1"
- [(set_attr "type" "fpload")])
+ "@
+ lfiwzx %0,%y1
+ lxsiwzx %x0,%y1
+ mtvsrwz %x0,%1"
+ [(set_attr "type" "fpload,fpload,mftgpr")])
(define_insn_and_split "floatunssi<mode>2_lfiwzx"
[(set (match_operand:SFDF 0 "gpc_reg_operand" "=d")
@@ -5651,7 +5774,8 @@
rtx src = operands[1];
rtx tmp;
- if (!MEM_P (src) && TARGET_MFPGPR && TARGET_POWERPC64)
+ if (!MEM_P (src) && TARGET_POWERPC64
+ && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
tmp = convert_to_mode (DImode, src, true);
else
{
@@ -5942,7 +6066,7 @@
emit_insn (gen_stfiwx (dest, tmp));
DONE;
}
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
{
dest = gen_lowpart (DImode, dest);
emit_move_insn (dest, tmp);
@@ -6036,7 +6160,7 @@
emit_insn (gen_stfiwx (dest, tmp));
DONE;
}
- else if (TARGET_MFPGPR && TARGET_POWERPC64)
+ else if (TARGET_POWERPC64 && (TARGET_MFPGPR || TARGET_DIRECT_MOVE))
{
dest = gen_lowpart (DImode, dest);
emit_move_insn (dest, tmp);
@@ -8285,6 +8409,18 @@
(compare:CC (match_dup 0)
(const_int 0)))]
"")
+
+;; Eqv operation.
+(define_insn "*eqv<mode>3"
+ [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+ (not:GPR
+ (xor:GPR (match_operand:GPR 1 "gpc_reg_operand" "r")
+ (match_operand:GPR 2 "gpc_reg_operand" "r"))))]
+ ""
+ "eqv %0,%1,%2"
+ [(set_attr "type" "integer")
+ (set_attr "length" "4")])
+
;; Now define ways of moving data around.
@@ -8490,7 +8626,7 @@
cmp<wd>i %2,%0,0
mr. %0,%1
#"
- [(set_attr "type" "cmp,compare,cmp")
+ [(set_attr "type" "cmp,fast_compare,cmp")
(set_attr "length" "4,4,8")])
(define_split
@@ -8680,8 +8816,8 @@
}")
(define_insn "mov<mode>_hardfloat"
- [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,*c*l,!r,*h,!r,!r")
- (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,r,h,0,G,Fn"))]
+ [(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=!r,!r,m,f,wa,wa,<f32_lr>,<f32_sm>,wm,Z,?<f32_dm>,?r,*c*l,!r,*h,!r,!r")
+ (match_operand:FMOVE32 1 "input_operand" "r,m,r,f,wa,j,<f32_lm>,<f32_sr>,Z,wm,r,<f32_dm>,r,h,0,G,Fn"))]
"(gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_SINGLE_FLOAT)"
@@ -8694,6 +8830,10 @@
xxlxor %x0,%x0,%x0
<f32_li>
<f32_si>
+ <f32_lv>
+ <f32_sv>
+ mtvsrwz %x0,%1
+ mfvsrwz %0,%x1
mt%0 %1
mf%1 %0
nop
@@ -8732,16 +8872,20 @@
(match_test "update_address_mem (operands[0], VOIDmode)")
(const_string "fpstore_u")
(const_string "fpstore")))
+ (const_string "fpload")
+ (const_string "fpstore")
+ (const_string "mftgpr")
+ (const_string "mffgpr")
(const_string "mtjmpr")
(const_string "mfjmpr")
(const_string "*")
(const_string "*")
(const_string "*")])
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,8")])
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8")])
(define_insn "*mov<mode>_softfloat"
[(set (match_operand:FMOVE32 0 "nonimmediate_operand" "=r,cl,r,r,m,r,r,r,r,*h")
- (match_operand:FMOVE32 1 "input_operand" "r, r,h,m,r,I,L,G,Fn,0"))]
+ (match_operand:FMOVE32 1 "input_operand" "r,r,h,m,r,I,L,G,Fn,0"))]
"(gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))
&& (TARGET_SOFT_FLOAT || !TARGET_FPRS)"
@@ -8954,8 +9098,8 @@
; ld/std require word-aligned displacements -> 'Y' constraint.
; List Y->r and r->Y before r->r for reload.
(define_insn "*mov<mode>_hardfloat64"
- [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg")
- (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r"))]
+ [(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=m,d,d,ws,?wa,Z,?Z,ws,?wa,wa,Y,r,!r,*c*l,!r,*h,!r,!r,!r,r,wg,r,wm")
+ (match_operand:FMOVE64 1 "input_operand" "d,m,d,Z,Z,ws,wa,ws,wa,j,r,Y,r,r,h,0,G,H,F,wg,r,wm,r"))]
"TARGET_POWERPC64 && TARGET_HARD_FLOAT && TARGET_FPRS && TARGET_DOUBLE_FLOAT
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode))"
@@ -8980,7 +9124,9 @@
#
#
mftgpr %0,%1
- mffgpr %0,%1"
+ mffgpr %0,%1
+ mfvsrd %0,%x1
+ mtvsrd %x0,%1"
[(set_attr_alternative "type"
[(if_then_else
(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9038,8 +9184,10 @@
(const_string "*")
(const_string "*")
(const_string "mftgpr")
+ (const_string "mffgpr")
+ (const_string "mftgpr")
(const_string "mffgpr")])
- (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4")])
+ (set_attr "length" "4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,8,12,16,4,4,4,4")])
(define_insn "*mov<mode>_softfloat64"
[(set (match_operand:FMOVE64 0 "nonimmediate_operand" "=Y,r,r,cl,r,r,r,r,*h")
@@ -9154,8 +9302,8 @@
"&& reload_completed"
[(pc)]
{
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word),
operands[1]);
emit_move_insn (simplify_gen_subreg (DFmode, operands[0], TFmode, lo_word),
@@ -9384,8 +9532,8 @@
&& TARGET_LONG_DOUBLE_128"
"
{
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
operands[3] = gen_reg_rtx (DFmode);
operands[4] = gen_reg_rtx (CCFPmode);
operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
@@ -9419,6 +9567,216 @@
})
+;; Power8 merge instructions to allow direct move to/from floating point
+;; registers in 32-bit mode. We use TF mode to get two registers to move the
+;; individual 32-bit parts across. Subreg doesn't work too well on the TF
+;; value, since it is allocated in reload and not all of the flow information
+;; is setup for it. We have two patterns to do the two moves between gprs and
+;; fprs. There isn't a dependancy between the two, but we could potentially
+;; schedule other instructions between the two instructions. TFmode is
+;; currently limited to traditional FPR registers. If/when this is changed, we
+;; will need to revist %L to make sure it works with VSX registers, or add an
+;; %x version of %L.
+
+(define_insn "p8_fmrgow_<mode>"
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=d")
+ (unspec:FMOVE64X [(match_operand:TF 1 "register_operand" "d")]
+ UNSPEC_P8V_FMRGOW))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "fmrgow %0,%1,%L1"
+ [(set_attr "type" "vecperm")])
+
+(define_insn "p8_mtvsrwz_1"
+ [(set (match_operand:TF 0 "register_operand" "=d")
+ (unspec:TF [(match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRWZ))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrwz %x0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrwz_2"
+ [(set (match_operand:TF 0 "register_operand" "+d")
+ (unspec:TF [(match_dup 0)
+ (match_operand:SI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRWZ))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrwz %L0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_fpr_from_gpr<mode>"
+ [(set (match_operand:FMOVE64X 0 "register_operand" "=ws")
+ (unspec:FMOVE64X [(match_operand:FMOVE64X 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:TF 2 "register_operand" "=d"))]
+ "!TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (SImode, src);
+ rtx gpr_lo_reg = gen_lowpart (SImode, src);
+
+ emit_insn (gen_p8_mtvsrwz_1 (tmp, gpr_hi_reg));
+ emit_insn (gen_p8_mtvsrwz_2 (tmp, gpr_lo_reg));
+ emit_insn (gen_p8_fmrgow_<mode> (dest, tmp));
+ DONE;
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move 128 bit values from GPRs to VSX registers in 64-bit mode
+(define_insn "p8_mtvsrd_1"
+ [(set (match_operand:TF 0 "register_operand" "=ws")
+ (unspec:TF [(match_operand:DI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRD))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrd %0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_mtvsrd_2"
+ [(set (match_operand:TF 0 "register_operand" "+ws")
+ (unspec:TF [(match_dup 0)
+ (match_operand:DI 1 "register_operand" "r")]
+ UNSPEC_P8V_MTVSRD))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "mtvsrd %L0,%1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn "p8_xxpermdi_<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+ (unspec:FMOVE128_GPR [(match_operand:TF 1 "register_operand" "ws")]
+ UNSPEC_P8V_XXPERMDI))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "xxpermdi %x0,%1,%L1,0"
+ [(set_attr "type" "vecperm")])
+
+(define_insn_and_split "reload_vsx_from_gpr<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=wa")
+ (unspec:FMOVE128_GPR
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:TF 2 "register_operand" "=ws"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (DImode, src);
+ rtx gpr_lo_reg = gen_lowpart (DImode, src);
+
+ emit_insn (gen_p8_mtvsrd_1 (tmp, gpr_hi_reg));
+ emit_insn (gen_p8_mtvsrd_2 (tmp, gpr_lo_reg));
+ emit_insn (gen_p8_xxpermdi_<mode> (dest, tmp));
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move SFmode to a VSX from a GPR register. Because scalar floating point
+;; type is stored internally as double precision in the VSX registers, we have
+;; to convert it from the vector format.
+
+(define_insn_and_split "reload_vsx_from_gprsf"
+ [(set (match_operand:SF 0 "register_operand" "=wa")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "r")]
+ UNSPEC_P8V_RELOAD_FROM_GPR))
+ (clobber (match_operand:DI 2 "register_operand" "=r"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx op0_di = simplify_gen_subreg (DImode, op0, SFmode, 0);
+ rtx op1_di = simplify_gen_subreg (DImode, op1, SFmode, 0);
+
+ /* Move SF value to upper 32-bits for xscvspdpn. */
+ emit_insn (gen_ashldi3 (op2, op1_di, GEN_INT (32)));
+ emit_move_insn (op0_di, op2);
+ emit_insn (gen_vsx_xscvspdpn_directmove (op0, op0));
+ DONE;
+}
+ [(set_attr "length" "8")
+ (set_attr "type" "two")])
+
+;; Move 128 bit values from VSX registers to GPRs in 64-bit mode by doing a
+;; normal 64-bit move, followed by an xxpermdi to get the bottom 64-bit value,
+;; and then doing a move of that.
+(define_insn "p8_mfvsrd_3_<mode>"
+ [(set (match_operand:DF 0 "register_operand" "=r")
+ (unspec:DF [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "mfvsrd %0,%x1"
+ [(set_attr "type" "mftgpr")])
+
+(define_insn_and_split "reload_gpr_from_vsx<mode>"
+ [(set (match_operand:FMOVE128_GPR 0 "register_operand" "=r")
+ (unspec:FMOVE128_GPR
+ [(match_operand:FMOVE128_GPR 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))
+ (clobber (match_operand:FMOVE128_GPR 2 "register_operand" "=wa"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx dest = operands[0];
+ rtx src = operands[1];
+ rtx tmp = operands[2];
+ rtx gpr_hi_reg = gen_highpart (DFmode, dest);
+ rtx gpr_lo_reg = gen_lowpart (DFmode, dest);
+
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_hi_reg, src));
+ emit_insn (gen_vsx_xxpermdi_<mode> (tmp, src, src, GEN_INT (3)));
+ emit_insn (gen_p8_mfvsrd_3_<mode> (gpr_lo_reg, tmp));
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+;; Move SFmode to a GPR from a VSX register. Because scalar floating point
+;; type is stored internally as double precision, we have to convert it to the
+;; vector format.
+
+(define_insn_and_split "reload_gpr_from_vsxsf"
+ [(set (match_operand:SF 0 "register_operand" "=r")
+ (unspec:SF [(match_operand:SF 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))
+ (clobber (match_operand:V4SF 2 "register_operand" "=wa"))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "#"
+ "&& reload_completed"
+ [(const_int 0)]
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx diop0 = simplify_gen_subreg (DImode, op0, SFmode, 0);
+
+ emit_insn (gen_vsx_xscvdpspn_scalar (op2, op1));
+ emit_insn (gen_p8_mfvsrd_4_disf (diop0, op2));
+ emit_insn (gen_lshrdi3 (diop0, diop0, GEN_INT (32)));
+ DONE;
+}
+ [(set_attr "length" "12")
+ (set_attr "type" "three")])
+
+(define_insn "p8_mfvsrd_4_disf"
+ [(set (match_operand:DI 0 "register_operand" "=r")
+ (unspec:DI [(match_operand:V4SF 1 "register_operand" "wa")]
+ UNSPEC_P8V_RELOAD_FROM_VSX))]
+ "TARGET_POWERPC64 && TARGET_DIRECT_MOVE && WORDS_BIG_ENDIAN"
+ "mfvsrd %0,%x1"
+ [(set_attr "type" "mftgpr")])
+
+
;; Next come the multi-word integer load and store and the load and store
;; multiple insns.
@@ -9467,7 +9825,8 @@
[(set (match_operand:DI 0 "gpc_reg_operand" "")
(match_operand:DI 1 "const_int_operand" ""))]
"! TARGET_POWERPC64 && reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 1))]
"
@@ -9485,13 +9844,14 @@
[(set (match_operand:DIFD 0 "rs6000_nonimmediate_operand" "")
(match_operand:DIFD 1 "input_operand" ""))]
"reload_completed && !TARGET_POWERPC64
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
(define_insn "*movdi_internal64"
- [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg")
- (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r"))]
+ [(set (match_operand:DI 0 "nonimmediate_operand" "=Y,r,r,r,r,r,?m,?*d,?*d,?Z,?wa,?wa,r,*h,*h,?wa,r,?*wg,r,?*wm")
+ (match_operand:DI 1 "input_operand" "r,Y,r,I,L,nF,d,m,d,wa,Z,wa,*h,r,0,O,*wg,r,*wm,r"))]
"TARGET_POWERPC64
&& (gpc_reg_operand (operands[0], DImode)
|| gpc_reg_operand (operands[1], DImode))"
@@ -9513,7 +9873,9 @@
nop
xxlxor %x0,%x0,%x0
mftgpr %0,%1
- mffgpr %0,%1"
+ mffgpr %0,%1
+ mfvsrd %0,%x1
+ mtvsrd %x0,%1"
[(set_attr_alternative "type"
[(if_then_else
(match_test "update_indexed_address_mem (operands[0], VOIDmode)")
@@ -9562,8 +9924,10 @@
(const_string "*")
(const_string "vecsimple")
(const_string "mftgpr")
+ (const_string "mffgpr")
+ (const_string "mftgpr")
(const_string "mffgpr")])
- (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4")])
+ (set_attr "length" "4,4,4,4,4,20,4,4,4,4,4,4,4,4,4,4,4,4,4,4")])
;; Generate all one-bits and clear left or right.
;; Use (and:DI (rotate:DI ...)) to avoid anddi3 unnecessary clobber.
@@ -9652,19 +10016,23 @@
(const_string "conditional")))])
(define_insn "*mov<mode>_ppc64"
- [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r")
- (match_operand:TI2 1 "input_operand" "r,Y,r"))]
- "(TARGET_POWERPC64
- && (<MODE>mode != TImode || VECTOR_MEM_NONE_P (TImode))
+ [(set (match_operand:TI2 0 "nonimmediate_operand" "=Y,r,r,r")
+ (match_operand:TI2 1 "input_operand" "r,Y,r,F"))]
+ "(TARGET_POWERPC64 && VECTOR_MEM_NONE_P (<MODE>mode)
&& (gpc_reg_operand (operands[0], <MODE>mode)
|| gpc_reg_operand (operands[1], <MODE>mode)))"
- "#"
- [(set_attr "type" "store,load,*")])
+{
+ return rs6000_output_move_128bit (operands);
+}
+ [(set_attr "type" "store,load,*,*")
+ (set_attr "length" "8")])
(define_split
- [(set (match_operand:TI2 0 "gpc_reg_operand" "")
+ [(set (match_operand:TI2 0 "int_reg_operand" "")
(match_operand:TI2 1 "const_double_operand" ""))]
- "TARGET_POWERPC64"
+ "TARGET_POWERPC64
+ && (VECTOR_MEM_NONE_P (<MODE>mode)
+ || (reload_completed && INT_REGNO_P (REGNO (operands[0]))))"
[(set (match_dup 2) (match_dup 4))
(set (match_dup 3) (match_dup 5))]
"
@@ -9691,7 +10059,9 @@
[(set (match_operand:TI2 0 "nonimmediate_operand" "")
(match_operand:TI2 1 "input_operand" ""))]
"reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])
+ && !quad_load_store_p (operands[0], operands[1])"
[(pc)]
{ rs6000_split_multireg_move (operands[0], operands[1]); DONE; })
@@ -12554,8 +12924,8 @@
(match_dup 13)]
{
REAL_VALUE_TYPE rv;
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
operands[5] = simplify_gen_subreg (DFmode, operands[1], TFmode, hi_word);
operands[6] = simplify_gen_subreg (DFmode, operands[1], TFmode, lo_word);
@@ -14788,7 +15158,7 @@
(match_operand:P 2 "gpc_reg_operand" "r")] UNSPEC_BPERM))]
"TARGET_POPCNTD"
"bpermd %0,%1,%2"
- [(set_attr "type" "integer")])
+ [(set_attr "type" "popcnt")])
;; Builtin fma support. Handle
@@ -14931,3 +15301,4 @@
(include "spe.md")
(include "dfp.md")
(include "paired.md")
+(include "crypto.md")
diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt
index 17b77629fa1..9a078198130 100644
--- a/gcc/config/rs6000/rs6000.opt
+++ b/gcc/config/rs6000/rs6000.opt
@@ -517,4 +517,28 @@ Control whether we save the TOC in the prologue for indirect calls or generate t
mvsx-timode
Target Undocumented Mask(VSX_TIMODE) Var(rs6000_isa_flags)
-; Allow/disallow TImode in VSX registers
+Allow 128-bit integers in VSX registers
+
+mpower8-fusion
+Target Report Mask(P8_FUSION) Var(rs6000_isa_flags)
+Fuse certain integer operations together for better performance on power8
+
+mpower8-fusion-sign
+Target Undocumented Mask(P8_FUSION_SIGN) Var(rs6000_isa_flags)
+Allow sign extension in fusion operations
+
+mpower8-vector
+Target Report Mask(P8_VECTOR) Var(rs6000_isa_flags)
+Use/do not use vector and scalar instructions added in ISA 2.07.
+
+mcrypto
+Target Report Mask(CRYPTO) Var(rs6000_isa_flags)
+Use ISA 2.07 crypto instructions
+
+mdirect-move
+Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags)
+Use ISA 2.07 direct move between GPR & VSX register instructions
+
+mquad-memory
+Target Report Mask(QUAD_MEMORY) Var(rs6000_isa_flags)
+Generate the quad word memory instructions (lq/stq/lqarx/stqcx).
diff --git a/gcc/config/rs6000/spe.md b/gcc/config/rs6000/spe.md
index cec2b430b82..bf10a5dc180 100644
--- a/gcc/config/rs6000/spe.md
+++ b/gcc/config/rs6000/spe.md
@@ -2604,8 +2604,8 @@
&& TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
"
{
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
operands[3] = gen_reg_rtx (DFmode);
operands[4] = gen_reg_rtx (CCFPmode);
operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
@@ -2627,8 +2627,8 @@
&& TARGET_HARD_FLOAT && TARGET_E500_DOUBLE && TARGET_LONG_DOUBLE_128"
"
{
- const int hi_word = FLOAT_WORDS_BIG_ENDIAN ? 0 : GET_MODE_SIZE (DFmode);
- const int lo_word = FLOAT_WORDS_BIG_ENDIAN ? GET_MODE_SIZE (DFmode) : 0;
+ const int hi_word = LONG_DOUBLE_LARGE_FIRST ? 0 : GET_MODE_SIZE (DFmode);
+ const int lo_word = LONG_DOUBLE_LARGE_FIRST ? GET_MODE_SIZE (DFmode) : 0;
operands[3] = gen_reg_rtx (DFmode);
operands[4] = gen_reg_rtx (CCFPmode);
operands[5] = simplify_gen_subreg (DFmode, operands[0], TFmode, hi_word);
diff --git a/gcc/config/rs6000/sync.md b/gcc/config/rs6000/sync.md
index 252e2690a98..8616b3eca5f 100644
--- a/gcc/config/rs6000/sync.md
+++ b/gcc/config/rs6000/sync.md
@@ -18,14 +18,23 @@
;; along with GCC; see the file COPYING3. If not see
;; <http://www.gnu.org/licenses/>.
-(define_mode_attr larx [(SI "lwarx") (DI "ldarx")])
-(define_mode_attr stcx [(SI "stwcx.") (DI "stdcx.")])
+(define_mode_attr larx [(QI "lbarx")
+ (HI "lharx")
+ (SI "lwarx")
+ (DI "ldarx")
+ (TI "lqarx")])
+
+(define_mode_attr stcx [(QI "stbcx.")
+ (HI "sthcx.")
+ (SI "stwcx.")
+ (DI "stdcx.")
+ (TI "stqcx.")])
(define_code_iterator FETCHOP [plus minus ior xor and])
(define_code_attr fetchop_name
[(plus "add") (minus "sub") (ior "or") (xor "xor") (and "and")])
(define_code_attr fetchop_pred
- [(plus "add_operand") (minus "gpc_reg_operand")
+ [(plus "add_operand") (minus "int_reg_operand")
(ior "logical_operand") (xor "logical_operand") (and "and_operand")])
(define_expand "mem_thread_fence"
@@ -129,16 +138,7 @@
case MEMMODEL_CONSUME:
case MEMMODEL_ACQUIRE:
case MEMMODEL_SEQ_CST:
- if (GET_MODE (operands[0]) == QImode)
- emit_insn (gen_loadsync_qi (operands[0]));
- else if (GET_MODE (operands[0]) == HImode)
- emit_insn (gen_loadsync_hi (operands[0]));
- else if (GET_MODE (operands[0]) == SImode)
- emit_insn (gen_loadsync_si (operands[0]));
- else if (GET_MODE (operands[0]) == DImode)
- emit_insn (gen_loadsync_di (operands[0]));
- else
- gcc_unreachable ();
+ emit_insn (gen_loadsync_<mode> (operands[0]));
break;
default:
gcc_unreachable ();
@@ -170,35 +170,109 @@
DONE;
})
-;; ??? Power ISA 2.06B says that there *is* a load-{byte,half}-and-reserve
-;; opcode that is "phased-in". Not implemented as of Power7, so not yet used,
-;; but let's prepare the macros anyway.
+;; Any supported integer mode that has atomic l<x>arx/st<x>cx. instrucitons
+;; other than the quad memory operations, which have special restrictions.
+;; Byte/halfword atomic instructions were added in ISA 2.06B, but were phased
+;; in and did not show up until power8. TImode atomic lqarx/stqcx. require
+;; special handling due to even/odd register requirements.
+(define_mode_iterator ATOMIC [(QI "TARGET_SYNC_HI_QI")
+ (HI "TARGET_SYNC_HI_QI")
+ SI
+ (DI "TARGET_POWERPC64")])
+
+;; Types that we should provide atomic instructions for.
-(define_mode_iterator ATOMIC [SI (DI "TARGET_POWERPC64")])
+(define_mode_iterator AINT [QI
+ HI
+ SI
+ (DI "TARGET_POWERPC64")
+ (TI "TARGET_SYNC_TI")])
(define_insn "load_locked<mode>"
- [(set (match_operand:ATOMIC 0 "gpc_reg_operand" "=r")
+ [(set (match_operand:ATOMIC 0 "int_reg_operand" "=r")
(unspec_volatile:ATOMIC
[(match_operand:ATOMIC 1 "memory_operand" "Z")] UNSPECV_LL))]
""
"<larx> %0,%y1"
[(set_attr "type" "load_l")])
+(define_insn "load_locked<QHI:mode>_si"
+ [(set (match_operand:SI 0 "int_reg_operand" "=r")
+ (unspec_volatile:SI
+ [(match_operand:QHI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ "TARGET_SYNC_HI_QI"
+ "<QHI:larx> %0,%y1"
+ [(set_attr "type" "load_l")])
+
+;; Use PTImode to get even/odd register pairs
+(define_expand "load_lockedti"
+ [(use (match_operand:TI 0 "quad_int_reg_operand" ""))
+ (use (match_operand:TI 1 "memory_operand" ""))]
+ "TARGET_SYNC_TI"
+{
+ /* Use a temporary register to force getting an even register for the
+ lqarx/stqcrx. instructions. Normal optimizations will eliminate this
+ extra copy. */
+ rtx pti = gen_reg_rtx (PTImode);
+ emit_insn (gen_load_lockedpti (pti, operands[1]));
+ emit_move_insn (operands[0], gen_lowpart (TImode, pti));
+ DONE;
+})
+
+(define_insn "load_lockedpti"
+ [(set (match_operand:PTI 0 "quad_int_reg_operand" "=&r")
+ (unspec_volatile:PTI
+ [(match_operand:TI 1 "memory_operand" "Z")] UNSPECV_LL))]
+ "TARGET_SYNC_TI
+ && !reg_mentioned_p (operands[0], operands[1])
+ && quad_int_reg_operand (operands[0], PTImode)"
+ "lqarx %0,%y1"
+ [(set_attr "type" "load_l")])
+
(define_insn "store_conditional<mode>"
[(set (match_operand:CC 0 "cc_reg_operand" "=x")
(unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
(set (match_operand:ATOMIC 1 "memory_operand" "=Z")
- (match_operand:ATOMIC 2 "gpc_reg_operand" "r"))]
+ (match_operand:ATOMIC 2 "int_reg_operand" "r"))]
""
"<stcx> %2,%y1"
[(set_attr "type" "store_c")])
+(define_expand "store_conditionalti"
+ [(use (match_operand:CC 0 "cc_reg_operand" ""))
+ (use (match_operand:TI 1 "memory_operand" ""))
+ (use (match_operand:TI 2 "quad_int_reg_operand" ""))]
+ "TARGET_SYNC_TI"
+{
+ rtx op0 = operands[0];
+ rtx op1 = operands[1];
+ rtx op2 = operands[2];
+ rtx pti_op1 = change_address (op1, PTImode, XEXP (op1, 0));
+ rtx pti_op2 = gen_reg_rtx (PTImode);
+
+ /* Use a temporary register to force getting an even register for the
+ lqarx/stqcrx. instructions. Normal optimizations will eliminate this
+ extra copy. */
+ emit_move_insn (pti_op2, gen_lowpart (PTImode, op2));
+ emit_insn (gen_store_conditionalpti (op0, pti_op1, pti_op2));
+ DONE;
+})
+
+(define_insn "store_conditionalpti"
+ [(set (match_operand:CC 0 "cc_reg_operand" "=x")
+ (unspec_volatile:CC [(const_int 0)] UNSPECV_SC))
+ (set (match_operand:PTI 1 "memory_operand" "=Z")
+ (match_operand:PTI 2 "quad_int_reg_operand" "r"))]
+ "TARGET_SYNC_TI && quad_int_reg_operand (operands[2], PTImode)"
+ "stqcx. %2,%y1"
+ [(set_attr "type" "store_c")])
+
(define_expand "atomic_compare_and_swap<mode>"
- [(match_operand:SI 0 "gpc_reg_operand" "") ;; bool out
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; val out
- (match_operand:INT1 2 "memory_operand" "") ;; memory
- (match_operand:INT1 3 "reg_or_short_operand" "") ;; expected
- (match_operand:INT1 4 "gpc_reg_operand" "") ;; desired
+ [(match_operand:SI 0 "int_reg_operand" "") ;; bool out
+ (match_operand:AINT 1 "int_reg_operand" "") ;; val out
+ (match_operand:AINT 2 "memory_operand" "") ;; memory
+ (match_operand:AINT 3 "reg_or_short_operand" "") ;; expected
+ (match_operand:AINT 4 "int_reg_operand" "") ;; desired
(match_operand:SI 5 "const_int_operand" "") ;; is_weak
(match_operand:SI 6 "const_int_operand" "") ;; model succ
(match_operand:SI 7 "const_int_operand" "")] ;; model fail
@@ -209,9 +283,9 @@
})
(define_expand "atomic_exchange<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; input
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; input
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -220,9 +294,9 @@
})
(define_expand "atomic_<fetchop_name><mode>"
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 0)
- (match_operand:INT1 1 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 0)
+ (match_operand:AINT 1 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 2 "const_int_operand" "")] ;; model
""
{
@@ -232,8 +306,8 @@
})
(define_expand "atomic_nand<mode>"
- [(match_operand:INT1 0 "memory_operand" "") ;; memory
- (match_operand:INT1 1 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "memory_operand" "") ;; memory
+ (match_operand:AINT 1 "int_reg_operand" "") ;; operand
(match_operand:SI 2 "const_int_operand" "")] ;; model
""
{
@@ -243,10 +317,10 @@
})
(define_expand "atomic_fetch_<fetchop_name><mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 1)
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 1)
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -256,9 +330,9 @@
})
(define_expand "atomic_fetch_nand<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -268,10 +342,10 @@
})
(define_expand "atomic_<fetchop_name>_fetch<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (FETCHOP:INT1 (match_dup 1)
- (match_operand:INT1 2 "<fetchop_pred>" "")) ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (FETCHOP:AINT (match_dup 1)
+ (match_operand:AINT 2 "<fetchop_pred>" "")) ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
@@ -281,9 +355,9 @@
})
(define_expand "atomic_nand_fetch<mode>"
- [(match_operand:INT1 0 "gpc_reg_operand" "") ;; output
- (match_operand:INT1 1 "memory_operand" "") ;; memory
- (match_operand:INT1 2 "gpc_reg_operand" "") ;; operand
+ [(match_operand:AINT 0 "int_reg_operand" "") ;; output
+ (match_operand:AINT 1 "memory_operand" "") ;; memory
+ (match_operand:AINT 2 "int_reg_operand" "") ;; operand
(match_operand:SI 3 "const_int_operand" "")] ;; model
""
{
diff --git a/gcc/config/rs6000/t-linux b/gcc/config/rs6000/t-linux
index 017a293cde3..62a5b941389 100644
--- a/gcc/config/rs6000/t-linux
+++ b/gcc/config/rs6000/t-linux
@@ -2,7 +2,7 @@
# or soft-float.
ifeq (,$(filter $(with_cpu),$(SOFT_FLOAT_CPUS))$(findstring soft,$(with_float)))
ifneq (,$(findstring spe,$(target)))
-MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring rs6000/e500-double.h, $(tm_file_list)),,v1)
+MULTIARCH_DIRNAME = powerpc-linux-gnuspe$(if $(findstring 8548,$(with_cpu)),,v1)
else
MULTIARCH_DIRNAME = powerpc-linux-gnu
endif
diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000
index 52c18391556..5889d6d82d4 100644
--- a/gcc/config/rs6000/t-rs6000
+++ b/gcc/config/rs6000/t-rs6000
@@ -60,6 +60,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
$(srcdir)/config/rs6000/power5.md \
$(srcdir)/config/rs6000/power6.md \
$(srcdir)/config/rs6000/power7.md \
+ $(srcdir)/config/rs6000/power8.md \
$(srcdir)/config/rs6000/cell.md \
$(srcdir)/config/rs6000/xfpu.md \
$(srcdir)/config/rs6000/a2.md \
@@ -70,6 +71,7 @@ MD_INCLUDES = $(srcdir)/config/rs6000/rs64.md \
$(srcdir)/config/rs6000/vector.md \
$(srcdir)/config/rs6000/vsx.md \
$(srcdir)/config/rs6000/altivec.md \
+ $(srcdir)/config/rs6000/crypto.md \
$(srcdir)/config/rs6000/spe.md \
$(srcdir)/config/rs6000/dfp.md \
$(srcdir)/config/rs6000/paired.md
diff --git a/gcc/config/rs6000/vector.md b/gcc/config/rs6000/vector.md
index c1d00ca2a9b..6cfebdeebdc 100644
--- a/gcc/config/rs6000/vector.md
+++ b/gcc/config/rs6000/vector.md
@@ -24,13 +24,13 @@
;; Vector int modes
-(define_mode_iterator VEC_I [V16QI V8HI V4SI])
+(define_mode_iterator VEC_I [V16QI V8HI V4SI V2DI])
;; Vector float modes
(define_mode_iterator VEC_F [V4SF V2DF])
;; Vector arithmetic modes
-(define_mode_iterator VEC_A [V16QI V8HI V4SI V4SF V2DF])
+(define_mode_iterator VEC_A [V16QI V8HI V4SI V2DI V4SF V2DF])
;; Vector modes that need alginment via permutes
(define_mode_iterator VEC_K [V16QI V8HI V4SI V4SF])
@@ -45,7 +45,7 @@
(define_mode_iterator VEC_N [V4SI V4SF V2DI V2DF])
;; Vector comparison modes
-(define_mode_iterator VEC_C [V16QI V8HI V4SI V4SF V2DF])
+(define_mode_iterator VEC_C [V16QI V8HI V4SI V2DI V4SF V2DF])
;; Vector init/extract modes
(define_mode_iterator VEC_E [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -126,7 +126,9 @@
(match_operand:VEC_L 1 "input_operand" ""))]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& reload_completed
- && gpr_or_gpr_p (operands[0], operands[1])"
+ && gpr_or_gpr_p (operands[0], operands[1])
+ && !direct_move_p (operands[0], operands[1])
+ && !quad_load_store_p (operands[0], operands[1])"
[(pc)]
{
rs6000_split_multireg_move (operands[0], operands[1]);
@@ -730,9 +732,10 @@
"")
(define_expand "and<mode>3"
- [(set (match_operand:VEC_L 0 "vlogical_operand" "")
- (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
- (match_operand:VEC_L 2 "vlogical_operand" "")))]
+ [(parallel [(set (match_operand:VEC_L 0 "vlogical_operand" "")
+ (and:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
+ (match_operand:VEC_L 2 "vlogical_operand" "")))
+ (clobber (match_scratch:CC 3 ""))])]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
@@ -746,8 +749,8 @@
(define_expand "nor<mode>3"
[(set (match_operand:VEC_L 0 "vlogical_operand" "")
- (not:VEC_L (ior:VEC_L (match_operand:VEC_L 1 "vlogical_operand" "")
- (match_operand:VEC_L 2 "vlogical_operand" ""))))]
+ (and:VEC_L (not:VEC_L (match_operand:VEC_L 1 "vlogical_operand" ""))
+ (not:VEC_L (match_operand:VEC_L 2 "vlogical_operand" ""))))]
"VECTOR_MEM_ALTIVEC_OR_VSX_P (<MODE>mode)
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
@@ -760,6 +763,47 @@
&& (<MODE>mode != TImode || TARGET_POWERPC64)"
"")
+;; Power8 vector logical instructions.
+(define_expand "eqv<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (not:VEC_L
+ (xor:VEC_L (match_operand:VEC_L 1 "register_operand" "")
+ (match_operand:VEC_L 2 "register_operand" ""))))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Rewrite nand into canonical form
+(define_expand "nand<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (ior:VEC_L
+ (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+ (not:VEC_L (match_operand:VEC_L 2 "register_operand" ""))))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; The canonical form is to have the negated elment first, so we need to
+;; reverse arguments.
+(define_expand "orc<mode>3"
+ [(set (match_operand:VEC_L 0 "register_operand" "")
+ (ior:VEC_L
+ (not:VEC_L (match_operand:VEC_L 1 "register_operand" ""))
+ (match_operand:VEC_L 2 "register_operand" "")))]
+ "TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)
+ && (<MODE>mode != TImode || TARGET_POWERPC64)")
+
+;; Vector count leading zeros
+(define_expand "clz<mode>2"
+ [(set (match_operand:VEC_I 0 "register_operand" "")
+ (clz:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+ "TARGET_P8_VECTOR")
+
+;; Vector population count
+(define_expand "popcount<mode>2"
+ [(set (match_operand:VEC_I 0 "register_operand" "")
+ (popcount:VEC_I (match_operand:VEC_I 1 "register_operand" "")))]
+ "TARGET_P8_VECTOR")
+
+
;; Same size conversions
(define_expand "float<VEC_int><mode>2"
[(set (match_operand:VEC_F 0 "vfloat_operand" "")
@@ -1074,7 +1118,7 @@
[(set (match_operand:VEC_I 0 "vint_operand" "")
(rotate:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
(match_operand:VEC_I 2 "vint_operand" "")))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
;; Expanders for arithmetic shift left on each vector element
@@ -1082,7 +1126,7 @@
[(set (match_operand:VEC_I 0 "vint_operand" "")
(ashift:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
(match_operand:VEC_I 2 "vint_operand" "")))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
;; Expanders for logical shift right on each vector element
@@ -1090,7 +1134,7 @@
[(set (match_operand:VEC_I 0 "vint_operand" "")
(lshiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
(match_operand:VEC_I 2 "vint_operand" "")))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
;; Expanders for arithmetic shift right on each vector element
@@ -1098,7 +1142,7 @@
[(set (match_operand:VEC_I 0 "vint_operand" "")
(ashiftrt:VEC_I (match_operand:VEC_I 1 "vint_operand" "")
(match_operand:VEC_I 2 "vint_operand" "")))]
- "TARGET_ALTIVEC"
+ "VECTOR_UNIT_ALTIVEC_OR_VSX_P (<MODE>mode)"
"")
;; Vector reduction expanders for VSX
diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md
index 4adf6e5ac55..b87da826a95 100644
--- a/gcc/config/rs6000/vsx.md
+++ b/gcc/config/rs6000/vsx.md
@@ -36,6 +36,10 @@
;; Iterator for logical types supported by VSX
(define_mode_iterator VSX_L [V16QI V8HI V4SI V2DI V4SF V2DF TI])
+;; Like VSX_L, but don't support TImode for doing logical instructions in
+;; 32-bit
+(define_mode_iterator VSX_L2 [V16QI V8HI V4SI V2DI V4SF V2DF])
+
;; Iterator for memory move. Handle TImode specially to allow
;; it to use gprs as well as vsx registers.
(define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF])
@@ -191,6 +195,8 @@
UNSPEC_VSX_CVDPSXWS
UNSPEC_VSX_CVDPUXWS
UNSPEC_VSX_CVSPDP
+ UNSPEC_VSX_CVSPDPN
+ UNSPEC_VSX_CVDPSPN
UNSPEC_VSX_CVSXWDP
UNSPEC_VSX_CVUXWDP
UNSPEC_VSX_CVSXDSP
@@ -207,112 +213,31 @@
;; VSX moves
(define_insn "*vsx_mov<mode>"
- [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,*Y,*r,*r,<VSr>,?wa,*r,v,wZ,v")
- (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,Y,r,j,j,j,W,v,wZ"))]
+ [(set (match_operand:VSX_M 0 "nonimmediate_operand" "=Z,<VSr>,<VSr>,?Z,?wa,?wa,wQ,?&r,??Y,??r,??r,<VSr>,?wa,*r,v,wZ, v")
+ (match_operand:VSX_M 1 "input_operand" "<VSr>,Z,<VSr>,wa,Z,wa,r,wQ,r,Y,r,j,j,j,W,v,wZ"))]
"VECTOR_MEM_VSX_P (<MODE>mode)
&& (register_operand (operands[0], <MODE>mode)
|| register_operand (operands[1], <MODE>mode))"
{
- switch (which_alternative)
- {
- case 0:
- case 3:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "stx<VSm>x %x1,%y0";
-
- case 1:
- case 4:
- gcc_assert (MEM_P (operands[1])
- && GET_CODE (XEXP (operands[1], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[1], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[1], 0)) != PRE_MODIFY);
- return "lx<VSm>x %x0,%y1";
-
- case 2:
- case 5:
- return "xxlor %x0,%x1,%x1";
-
- case 6:
- case 7:
- case 8:
- case 11:
- return "#";
-
- case 9:
- case 10:
- return "xxlxor %x0,%x0,%x0";
-
- case 12:
- return output_vec_const_move (operands);
-
- case 13:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "stvx %1,%y0";
-
- case 14:
- gcc_assert (MEM_P (operands[0])
- && GET_CODE (XEXP (operands[0], 0)) != PRE_INC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_DEC
- && GET_CODE (XEXP (operands[0], 0)) != PRE_MODIFY);
- return "lvx %0,%y1";
-
- default:
- gcc_unreachable ();
- }
+ return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,*,*,*,vecsimple,vecsimple,*,*,vecstore,vecload")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecstore,vecload,vecsimple,load,store,store,load, *,vecsimple,vecsimple,*, *,vecstore,vecload")
+ (set_attr "length" "4,4,4,4,4,4,12,12,12,12,16,4,4,*,16,4,4")])
;; Unlike other VSX moves, allow the GPRs even for reloading, since a normal
;; use of TImode is for unions. However for plain data movement, slightly
;; favor the vector loads
(define_insn "*vsx_movti_64bit"
- [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,?Y,?r,?r,?r")
- (match_operand:TI 1 "input_operand" "wa, Z,wa, O,W,wZ, v, r, Y, r, n"))]
+ [(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v,v,wZ,wQ,&r,Y,r,r,?r")
+ (match_operand:TI 1 "input_operand" "wa,Z,wa,O,W,wZ,v,r,wQ,r,Y,r,n"))]
"TARGET_POWERPC64 && VECTOR_MEM_VSX_P (TImode)
&& (register_operand (operands[0], TImode)
|| register_operand (operands[1], TImode))"
{
- switch (which_alternative)
- {
- case 0:
- return "stxvd2x %x1,%y0";
-
- case 1:
- return "lxvd2x %x0,%y1";
-
- case 2:
- return "xxlor %x0,%x1,%x1";
-
- case 3:
- return "xxlxor %x0,%x0,%x0";
-
- case 4:
- return output_vec_const_move (operands);
-
- case 5:
- return "stvx %1,%y0";
-
- case 6:
- return "lvx %0,%y1";
-
- case 7:
- case 8:
- case 9:
- case 10:
- return "#";
-
- default:
- gcc_unreachable ();
- }
+ return rs6000_output_move_128bit (operands);
}
- [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,*,*,*,*")
- (set_attr "length" " 4, 4, 4, 4, 8, 4, 4,8,8,8,8")])
+ [(set_attr "type" "vecstore,vecload,vecsimple,vecsimple,vecsimple,vecstore,vecload,store,load,store,load,*,*")
+ (set_attr "length" "4,4,4,4,16,4,4,8,8,8,8,8,8")])
(define_insn "*vsx_movti_32bit"
[(set (match_operand:TI 0 "nonimmediate_operand" "=Z,wa,wa,wa,v, v,wZ,Q,Y,????r,????r,????r,r")
@@ -1003,6 +928,40 @@
"xscvspdp %x0,%x1"
[(set_attr "type" "fp")])
+;; ISA 2.07 xscvdpspn/xscvspdpn that does not raise an error on signalling NaNs
+(define_insn "vsx_xscvdpspn"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:V4SF [(match_operand:DF 1 "vsx_register_operand" "wd,wa")]
+ UNSPEC_VSX_CVDPSPN))]
+ "TARGET_XSCVDPSPN"
+ "xscvdpspn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvspdpn"
+ [(set (match_operand:DF 0 "vsx_register_operand" "=ws,?wa")
+ (unspec:DF [(match_operand:V4SF 1 "vsx_register_operand" "wa,wa")]
+ UNSPEC_VSX_CVSPDPN))]
+ "TARGET_XSCVSPDPN"
+ "xscvspdpn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+(define_insn "vsx_xscvdpspn_scalar"
+ [(set (match_operand:V4SF 0 "vsx_register_operand" "=wa")
+ (unspec:V4SF [(match_operand:SF 1 "vsx_register_operand" "f")]
+ UNSPEC_VSX_CVDPSPN))]
+ "TARGET_XSCVDPSPN"
+ "xscvdpspn %x0,%x1"
+ [(set_attr "type" "fp")])
+
+;; Used by direct move to move a SFmode value from GPR to VSX register
+(define_insn "vsx_xscvspdpn_directmove"
+ [(set (match_operand:SF 0 "vsx_register_operand" "=wa")
+ (unspec:SF [(match_operand:SF 1 "vsx_register_operand" "wa")]
+ UNSPEC_VSX_CVSPDPN))]
+ "TARGET_XSCVSPDPN"
+ "xscvspdpn %x0,%x1"
+ [(set_attr "type" "fp")])
+
;; Convert from 64-bit to 32-bit types
;; Note, favor the Altivec registers since the usual use of these instructions
;; is in vector converts and we need to use the Altivec vperm instruction.
@@ -1088,70 +1047,368 @@
(set_attr "fp_type" "<VSfptype_simple>")])
-;; Logical operations
-;; Do not support TImode logical instructions on 32-bit at present, because the
-;; compiler will see that we have a TImode and when it wanted DImode, and
-;; convert the DImode to TImode, store it on the stack, and load it in a VSX
-;; register.
-(define_insn "*vsx_and<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (and:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+;; Logical operations. Do not support TImode logical instructions on 32-bit at
+;; present, because the compiler will see that we have a TImode and when it
+;; wanted DImode, and convert the DImode to TImode, store it on the stack, and
+;; load it in a VSX register or generate extra logical instructions in GPR
+;; registers.
+
+;; When we are splitting the operations to GPRs, we use three alternatives, two
+;; where the first/second inputs and output are in the same register, and the
+;; third where the output specifies an early clobber so that we don't have to
+;; worry about overlapping registers.
+
+(define_insn "*vsx_and<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))
+ (clobber (match_scratch:CC 3 "X"))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxland %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_ior<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (ior:VSX_L (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_and<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+ (and:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r")
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))
+ (clobber (match_scratch:CC 3 "X,X,X,X"))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxland %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(parallel [(set (match_dup 4) (and:DI (match_dup 5) (match_dup 6)))
+ (clobber (match_dup 3))])
+ (parallel [(set (match_dup 7) (and:DI (match_dup 8) (match_dup 9)))
+ (clobber (match_dup 3))])]
+{
+ operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[7] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[9] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_ior<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_ior<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+ (ior:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+ (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlor %x0,%x1,%x2
+ #
+ #
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(const_int 0)]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
-(define_insn "*vsx_xor<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (xor:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+ if (operands[5] == constm1_rtx)
+ emit_move_insn (operands[3], constm1_rtx);
+
+ else if (operands[5] == const0_rtx)
+ {
+ if (!rtx_equal_p (operands[3], operands[4]))
+ emit_move_insn (operands[3], operands[4]);
+ }
+ else
+ emit_insn (gen_iordi3 (operands[3], operands[4], operands[5]));
+
+ if (operands[8] == constm1_rtx)
+ emit_move_insn (operands[8], constm1_rtx);
+
+ else if (operands[8] == const0_rtx)
+ {
+ if (!rtx_equal_p (operands[6], operands[7]))
+ emit_move_insn (operands[6], operands[7]);
+ }
+ else
+ emit_insn (gen_iordi3 (operands[6], operands[7], operands[8]));
+ DONE;
+}
+ [(set_attr "type" "vecsimple,two,two,two,three,three")
+ (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_xor<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "%wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "VECTOR_MEM_VSX_P (<MODE>mode) && !TARGET_POWERPC64"
"xxlxor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_one_cmpl<mode>2"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (not:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn_and_split "*vsx_xor<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r,?r,&?r")
+ (xor:VSX_L
+ (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r,0,r")
+ (match_operand:VSX_L 2 "vsx_reg_or_cint_operand" "wa,r,0,r,n,n")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlxor %x0,%x1,%x2
+ #
+ #
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (xor:DI (match_dup 4) (match_dup 5)))
+ (set (match_dup 6) (xor:DI (match_dup 7) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two,three,three")
+ (set_attr "length" "4,8,8,8,16,16")])
+
+(define_insn "*vsx_one_cmpl<mode>2_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlnor %x0,%x1,%x1"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_one_cmpl<mode>2_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,&?r")
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnor %x0,%x1,%x1
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 2) (not:DI (match_dup 3)))
+ (set (match_dup 4) (not:DI (match_dup 5)))]
+{
+ operands[2] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[3] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[5] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two")
+ (set_attr "length" "4,8,8")])
-(define_insn "*vsx_nor<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
- (not:VSX_L
- (ior:VSX_L
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
+(define_insn "*vsx_nor<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L 1 "vlogical_operand" "%wa"))
+ (not:VSX_L2 (match_operand:VSX_L 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
"xxlnor %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nor<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,&?r")
+ (and:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "%wa,0,r,r"))
+ (not:VSX_L (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnor %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+ (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+(define_insn "*vsx_andc<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (and:VSX_L2
+ (not:VSX_L2
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa"))
+ (match_operand:VSX_L2 1 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlandc %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
-(define_insn "*vsx_andc<mode>3"
- [(set (match_operand:VSX_L 0 "vsx_register_operand" "=<VSr>,?wa")
+(define_insn_and_split "*vsx_andc<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
(and:VSX_L
(not:VSX_L
- (match_operand:VSX_L 2 "vsx_register_operand" "<VSr>,?wa"))
- (match_operand:VSX_L 1 "vsx_register_operand" "<VSr>,?wa")))]
- "VECTOR_MEM_VSX_P (<MODE>mode)
- && (<MODE>mode != TImode || TARGET_POWERPC64)"
- "xxlandc %x0,%x1,%x2"
- [(set_attr "type" "vecsimple")])
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,0,r,r"))
+ (match_operand:VSX_L 1 "vlogical_operand" "wa,r,0,r")))]
+ "TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlandc %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (and:DI (not:DI (match_dup 4)) (match_dup 5)))
+ (set (match_dup 6) (and:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; Power8 vector logical instructions.
+(define_insn "*vsx_eqv<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (not:VSX_L2
+ (xor:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa")
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxleqv %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_eqv<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+ (not:VSX_L
+ (xor:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r")
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxleqv %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (not:DI (xor:DI (match_dup 4) (match_dup 5))))
+ (set (match_dup 6) (not:DI (xor:DI (match_dup 7) (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; Rewrite nand into canonical form
+(define_insn "*vsx_nand<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+ (not:VSX_L2 (match_operand:VSX_L2 2 "vlogical_operand" "wa"))))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlnand %x0,%x1,%x2"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_nand<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "register_operand" "=wa,?r,?r,?r")
+ (ior:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "register_operand" "wa,0,r,r"))
+ (not:VSX_L (match_operand:VSX_L 2 "register_operand" "wa,r,0,r"))))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlnand %x0,%x1,%x2
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (not:DI (match_dup 5))))
+ (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (not:DI (match_dup 8))))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
+
+;; Rewrite or complement into canonical form, by reversing the arguments
+(define_insn "*vsx_orc<mode>3_32bit"
+ [(set (match_operand:VSX_L2 0 "vlogical_operand" "=wa")
+ (ior:VSX_L2
+ (not:VSX_L2 (match_operand:VSX_L2 1 "vlogical_operand" "wa"))
+ (match_operand:VSX_L2 2 "vlogical_operand" "wa")))]
+ "!TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "xxlorc %x0,%x2,%x1"
+ [(set_attr "type" "vecsimple")
+ (set_attr "length" "4")])
+
+(define_insn_and_split "*vsx_orc<mode>3_64bit"
+ [(set (match_operand:VSX_L 0 "vlogical_operand" "=wa,?r,?r,?r")
+ (ior:VSX_L
+ (not:VSX_L (match_operand:VSX_L 1 "vlogical_operand" "wa,0,r,r"))
+ (match_operand:VSX_L 2 "vlogical_operand" "wa,r,0,r")))]
+ "TARGET_POWERPC64 && TARGET_P8_VECTOR && VECTOR_MEM_VSX_P (<MODE>mode)"
+ "@
+ xxlorc %x0,%x2,%x1
+ #
+ #
+ #"
+ "reload_completed && TARGET_POWERPC64 && TARGET_P8_VECTOR
+ && VECTOR_MEM_VSX_P (<MODE>mode)
+ && int_reg_operand (operands[0], <MODE>mode)"
+ [(set (match_dup 3) (ior:DI (not:DI (match_dup 4)) (match_dup 5)))
+ (set (match_dup 6) (ior:DI (not:DI (match_dup 7)) (match_dup 8)))]
+{
+ operands[3] = simplify_subreg (DImode, operands[0], <MODE>mode, 0);
+ operands[4] = simplify_subreg (DImode, operands[1], <MODE>mode, 0);
+ operands[5] = simplify_subreg (DImode, operands[2], <MODE>mode, 0);
+ operands[6] = simplify_subreg (DImode, operands[0], <MODE>mode, 8);
+ operands[7] = simplify_subreg (DImode, operands[1], <MODE>mode, 8);
+ operands[8] = simplify_subreg (DImode, operands[2], <MODE>mode, 8);
+}
+ [(set_attr "type" "vecsimple,two,two,two")
+ (set_attr "length" "4,8,8,8")])
;; Permute operations
diff --git a/gcc/config/rx/rx-opts.h b/gcc/config/rx/rx-opts.h
index f00de76a901..4d5455e8d8d 100644
--- a/gcc/config/rx/rx-opts.h
+++ b/gcc/config/rx/rx-opts.h
@@ -24,7 +24,8 @@ enum rx_cpu_types
{
RX600,
RX610,
- RX200
+ RX200,
+ RX100
};
#endif
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index 15d5359ea1d..d781bb73d28 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -975,6 +975,8 @@ rx_gen_move_template (rtx * operands, bool is_movu)
loading an immediate into a register. */
extension = ".W";
break;
+ case DFmode:
+ case DImode:
case SFmode:
case SImode:
extension = ".L";
@@ -988,19 +990,44 @@ rx_gen_move_template (rtx * operands, bool is_movu)
}
if (MEM_P (src) && rx_pid_data_operand (XEXP (src, 0)) == PID_UNENCODED)
- src_template = "(%A1-__pid_base)[%P1]";
+ {
+ gcc_assert (GET_MODE (src) != DImode);
+ gcc_assert (GET_MODE (src) != DFmode);
+
+ src_template = "(%A1 - __pid_base)[%P1]";
+ }
else if (MEM_P (src) && rx_small_data_operand (XEXP (src, 0)))
- src_template = "%%gp(%A1)[%G1]";
+ {
+ gcc_assert (GET_MODE (src) != DImode);
+ gcc_assert (GET_MODE (src) != DFmode);
+
+ src_template = "%%gp(%A1)[%G1]";
+ }
else
src_template = "%1";
if (MEM_P (dest) && rx_small_data_operand (XEXP (dest, 0)))
- dst_template = "%%gp(%A0)[%G0]";
+ {
+ gcc_assert (GET_MODE (dest) != DImode);
+ gcc_assert (GET_MODE (dest) != DFmode);
+
+ dst_template = "%%gp(%A0)[%G0]";
+ }
else
dst_template = "%0";
- sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov",
- extension, src_template, dst_template);
+ if (GET_MODE (dest) == DImode || GET_MODE (dest) == DFmode)
+ {
+ gcc_assert (! is_movu);
+
+ if (REG_P (src) && REG_P (dest) && (REGNO (dest) == REGNO (src) + 1))
+ sprintf (out_template, "mov.L\t%H1, %H0 | mov.L\t%1, %0");
+ else
+ sprintf (out_template, "mov.L\t%1, %0 | mov.L\t%H1, %H0");
+ }
+ else
+ sprintf (out_template, "%s%s\t%s, %s", is_movu ? "movu" : "mov",
+ extension, src_template, dst_template);
return out_template;
}
@@ -3240,6 +3267,12 @@ rx_ok_to_inline (tree caller, tree callee)
|| lookup_attribute ("gnu_inline", DECL_ATTRIBUTES (callee)) != NULL_TREE;
}
+static bool
+rx_enable_lra (void)
+{
+ return TARGET_ENABLE_LRA || 1;
+}
+
#undef TARGET_NARROW_VOLATILE_BITFIELD
#define TARGET_NARROW_VOLATILE_BITFIELD rx_narrow_volatile_bitfield
@@ -3391,6 +3424,9 @@ rx_ok_to_inline (tree caller, tree callee)
#undef TARGET_WARN_FUNC_RETURN
#define TARGET_WARN_FUNC_RETURN rx_warn_func_return
+#undef TARGET_LRA_P
+#define TARGET_LRA_P rx_enable_lra
+
struct gcc_target targetm = TARGET_INITIALIZER;
#include "gt-rx.h"
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
index 092fd7659a2..72aee2fe214 100644
--- a/gcc/config/rx/rx.h
+++ b/gcc/config/rx/rx.h
@@ -29,9 +29,22 @@
builtin_define ("__RX610__"); \
builtin_assert ("machine=RX610"); \
} \
- else \
- builtin_assert ("machine=RX600"); \
- \
+ else if (rx_cpu_type == RX100) \
+ { \
+ builtin_define ("__RX100__"); \
+ builtin_assert ("machine=RX100"); \
+ } \
+ else if (rx_cpu_type == RX200) \
+ { \
+ builtin_define ("__RX200__"); \
+ builtin_assert ("machine=RX200"); \
+ } \
+ else if (rx_cpu_type == RX600) \
+ { \
+ builtin_define ("__RX600__"); \
+ builtin_assert ("machine=RX600"); \
+ } \
+ \
if (TARGET_BIG_ENDIAN_DATA) \
builtin_define ("__RX_BIG_ENDIAN__"); \
else \
@@ -60,6 +73,7 @@
#undef CC1_SPEC
#define CC1_SPEC "\
%{mas100-syntax:%{gdwarf*:%e-mas100-syntax is incompatible with -gdwarf}} \
+ %{mcpu=rx100:%{fpu:%erx100 cpu does not have FPU hardware}} \
%{mcpu=rx200:%{fpu:%erx200 cpu does not have FPU hardware}}"
#undef STARTFILE_SPEC
diff --git a/gcc/config/rx/rx.md b/gcc/config/rx/rx.md
index 3a95567a43f..692b7d220a3 100644
--- a/gcc/config/rx/rx.md
+++ b/gcc/config/rx/rx.md
@@ -30,7 +30,7 @@
;; then all operations on doubles have to be handled by
;; library functions.
(define_mode_iterator register_modes
- [(SF "ALLOW_RX_FPU_INSNS") (SI "") (HI "") (QI "")])
+ [(SF "") (SI "") (HI "") (QI "")])
(define_constants
[
@@ -2621,3 +2621,21 @@
""
""
)
+
+(define_insn "movdi"
+ [(set:DI (match_operand:DI 0 "nonimmediate_operand" "=rm")
+ (match_operand:DI 1 "general_operand" "rmi"))]
+ "TARGET_ENABLE_LRA || 1"
+ { return rx_gen_move_template (operands, false); }
+ [(set_attr "length" "16")
+ (set_attr "timings" "22")]
+)
+
+(define_insn "movdf"
+ [(set:DF (match_operand:DF 0 "nonimmediate_operand" "=rm")
+ (match_operand:DF 1 "general_operand" "rmi"))]
+ "TARGET_ENABLE_LRA || 1"
+ { return rx_gen_move_template (operands, false); }
+ [(set_attr "length" "16")
+ (set_attr "timings" "22")]
+)
diff --git a/gcc/config/rx/rx.opt b/gcc/config/rx/rx.opt
index 09d93c3e5f1..12312cfef6b 100644
--- a/gcc/config/rx/rx.opt
+++ b/gcc/config/rx/rx.opt
@@ -61,6 +61,9 @@ Enum(rx_cpu_types) String(rx200) Value(RX200)
EnumValue
Enum(rx_cpu_types) String(rx600) Value(RX600)
+EnumValue
+Enum(rx_cpu_types) String(rx100) Value(RX100)
+
;---------------------------------------------------
mbig-endian-data
@@ -132,3 +135,7 @@ Enable the use of the old, broken, ABI where all stacked function arguments are
mrx-abi
Target RejectNegative Report InverseMask(GCC_ABI)
Enable the use the standard RX ABI where all stacked function arguments are naturally aligned. This is the default.
+
+mlra
+Target Report Mask(ENABLE_LRA)
+Enable the use of the LRA register allocator.
diff --git a/gcc/config/rx/t-rx b/gcc/config/rx/t-rx
index 97079859240..41a3d3a98dc 100644
--- a/gcc/config/rx/t-rx
+++ b/gcc/config/rx/t-rx
@@ -28,7 +28,7 @@ MULTILIB_DIRNAMES = 64-bit-double no-fpu-libs big-endian-data pid
# MULTILIB_OPTIONS += mgcc-abi
# MULTILIB_DIRNAMES += gcc-abi
-MULTILIB_MATCHES = nofpu=mnofpu nofpu=mcpu?rx200
+MULTILIB_MATCHES = nofpu=mnofpu nofpu=mcpu?rx200 nofpu=mcpu?rx100
MULTILIB_EXCEPTIONS =
MULTILIB_EXTRA_OPTS =
diff --git a/gcc/config/s390/htmintrin.h b/gcc/config/s390/htmintrin.h
new file mode 100644
index 00000000000..7aaa9f5bf7c
--- /dev/null
+++ b/gcc/config/s390/htmintrin.h
@@ -0,0 +1,57 @@
+/* GNU compiler hardware transactional execution intrinsics
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef _HTMINTRIN_H
+#define _HTMINTRIN_H
+
+
+/* Condition codes generated by tbegin */
+#define _HTM_TBEGIN_STARTED 0
+#define _HTM_TBEGIN_INDETERMINATE 1
+#define _HTM_TBEGIN_TRANSIENT 2
+#define _HTM_TBEGIN_PERSISTENT 3
+
+/* The abort codes below this threshold are reserved for machine
+ use. */
+#define _HTM_FIRST_USER_ABORT_CODE 256
+
+/* The transaction diagnostic block is it is defined in the Principles
+ of Operation chapter 5-91. */
+
+struct __htm_tdb {
+ unsigned char format; /* 0 */
+ unsigned char flags;
+ unsigned char reserved1[4];
+ unsigned short nesting_depth;
+ unsigned long long abort_code; /* 8 */
+ unsigned long long conflict_token; /* 16 */
+ unsigned long long atia; /* 24 */
+ unsigned char eaid; /* 32 */
+ unsigned char dxc;
+ unsigned char reserved2[2];
+ unsigned int program_int_id;
+ unsigned long long exception_id; /* 40 */
+ unsigned long long bea; /* 48 */
+ unsigned char reserved3[72]; /* 56 */
+ unsigned long long gprs[16]; /* 128 */
+} __attribute__((__packed__, __aligned__ (8)));
+
+
+#endif /* _HTMINTRIN_H */
diff --git a/gcc/config/s390/htmxlintrin.h b/gcc/config/s390/htmxlintrin.h
new file mode 100644
index 00000000000..bb142195b2b
--- /dev/null
+++ b/gcc/config/s390/htmxlintrin.h
@@ -0,0 +1,182 @@
+/* XL compiler hardware transactional execution intrinsics
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef _HTMXLINTRIN_H
+#define _HTMXLINTRIN_H
+
+#include <stdint.h>
+
+#include <htmintrin.h>
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* These intrinsics are being made available for compatibility with
+ the IBM XL compiler. For documentation please see the "z/OS XL
+ C/C++ Programming Guide" publically available on the web. */
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_simple_begin ()
+{
+ return __builtin_tbegin_nofloat (0);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_begin (void* const tdb)
+{
+ return __builtin_tbegin_nofloat (tdb);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_end ()
+{
+ return __builtin_tend ();
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_abort ()
+{
+ return __builtin_tabort (_HTM_FIRST_USER_ABORT_CODE);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_named_abort (unsigned char const code)
+{
+ return __builtin_tabort ((int)_HTM_FIRST_USER_ABORT_CODE + code);
+}
+
+extern __inline void __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_non_transactional_store (void* const addr, long long const value)
+{
+ __builtin_non_tx_store ((uint64_t*)addr, (uint64_t)value);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_nesting_depth (void* const tdb_ptr)
+{
+ int depth = __builtin_tx_nesting_depth ();
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+ if (depth != 0)
+ return depth;
+
+ if (tdb->format == 0)
+ return 0;
+ return tdb->nesting_depth;
+}
+
+/* Transaction failure diagnostics */
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_user_abort (void* const tdb_ptr)
+{
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+ if (tdb->format == 0)
+ return 0;
+
+ return !!(tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE);
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_named_user_abort (void* const tdb_ptr, unsigned char* code)
+{
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+ if (tdb->format == 0)
+ return 0;
+
+ if (tdb->abort_code >= _HTM_FIRST_USER_ABORT_CODE)
+ {
+ *code = tdb->abort_code - _HTM_FIRST_USER_ABORT_CODE;
+ return 1;
+ }
+ return 0;
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_illegal (void* const tdb_ptr)
+{
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+ return (tdb->format == 0
+ && (tdb->abort_code == 4 /* unfiltered program interruption */
+ || tdb->abort_code == 11 /* restricted instruction */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_footprint_exceeded (void* const tdb_ptr)
+{
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+ return (tdb->format == 0
+ && (tdb->abort_code == 7 /* fetch overflow */
+ || tdb->abort_code == 8 /* store overflow */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_nested_too_deep (void* const tdb_ptr)
+{
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+ return tdb->format == 0 && tdb->abort_code == 13; /* depth exceeded */
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_conflict (void* const tdb_ptr)
+{
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+ return (tdb->format == 0
+ && (tdb->abort_code == 9 /* fetch conflict */
+ || tdb->abort_code == 10 /* store conflict */));
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_is_failure_persistent (long const result)
+{
+ return result == _HTM_TBEGIN_PERSISTENT;
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_address (void* const tdb_ptr)
+{
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+#ifdef __s390x__
+ return tdb->atia;
+#else
+ return tdb->atia & 0xffffffff;
+#endif
+}
+
+extern __inline long __attribute__((__gnu_inline__, __always_inline__, __artificial__))
+__TM_failure_code (void* const tdb_ptr)
+{
+ struct __htm_tdb *tdb = (struct __htm_tdb*)tdb_ptr;
+
+ return tdb->abort_code;
+}
+
+#ifdef __cplusplus
+}
+#endif
+
+#endif /* _HTMXLINTRIN_H */
diff --git a/gcc/config/s390/predicates.md b/gcc/config/s390/predicates.md
index 523326e177d..069b42489a7 100644
--- a/gcc/config/s390/predicates.md
+++ b/gcc/config/s390/predicates.md
@@ -176,7 +176,11 @@
{
if (GET_CODE (XEXP (op, 0)) != REG
|| REGNO (XEXP (op, 0)) != CC_REGNUM
- || XEXP (op, 1) != const0_rtx)
+ || (XEXP (op, 1) != const0_rtx
+ && !(CONST_INT_P (XEXP (op, 1))
+ && GET_MODE (XEXP (op, 0)) == CCRAWmode
+ && INTVAL (XEXP (op, 1)) >= 0
+ && INTVAL (XEXP (op, 1)) <= 15)))
return false;
return (s390_branch_condition_mask (op) >= 0);
@@ -224,7 +228,11 @@
if (GET_CODE (XEXP (op, 0)) != REG
|| REGNO (XEXP (op, 0)) != CC_REGNUM
- || XEXP (op, 1) != const0_rtx)
+ || (XEXP (op, 1) != const0_rtx
+ && !(CONST_INT_P (XEXP (op, 1))
+ && GET_MODE (XEXP (op, 0)) == CCRAWmode
+ && INTVAL (XEXP (op, 1)) >= 0
+ && INTVAL (XEXP (op, 1)) <= 15)))
return false;
switch (GET_MODE (XEXP (op, 0)))
diff --git a/gcc/config/s390/s390-modes.def b/gcc/config/s390/s390-modes.def
index 419108fb473..5e0b50cafa1 100644
--- a/gcc/config/s390/s390-modes.def
+++ b/gcc/config/s390/s390-modes.def
@@ -152,6 +152,14 @@ The compare and swap instructions sets the condition code to 0/1 if the
operands were equal/unequal. The CCZ1 mode ensures the result can be
effectively placed into a register.
+CCRAW
+
+The cc mode generated by a non-compare instruction. The condition
+code mask for the CC consumer is determined by the comparison operator
+(only EQ and NE allowed) and the immediate value given as second
+operand to the operator. For the other CC modes this value used to be
+0.
+
*/
@@ -172,3 +180,4 @@ CC_MODE (CCT);
CC_MODE (CCT1);
CC_MODE (CCT2);
CC_MODE (CCT3);
+CC_MODE (CCRAW);
diff --git a/gcc/config/s390/s390-protos.h b/gcc/config/s390/s390-protos.h
index 1a8205359e4..67283df4553 100644
--- a/gcc/config/s390/s390-protos.h
+++ b/gcc/config/s390/s390-protos.h
@@ -58,7 +58,7 @@ extern bool s390_match_ccmode (rtx, enum machine_mode);
extern enum machine_mode s390_tm_ccmode (rtx, rtx, bool);
extern enum machine_mode s390_select_ccmode (enum rtx_code, rtx, rtx);
extern rtx s390_emit_compare (enum rtx_code, rtx, rtx);
-extern void s390_emit_jump (rtx, rtx);
+extern rtx s390_emit_jump (rtx, rtx);
extern bool symbolic_reference_mentioned_p (rtx);
extern bool tls_symbolic_reference_mentioned_p (rtx);
extern bool legitimate_la_operand_p (rtx);
@@ -87,6 +87,7 @@ extern void s390_expand_cs_hqi (enum machine_mode, rtx, rtx, rtx,
rtx, rtx, bool);
extern void s390_expand_atomic (enum machine_mode, enum rtx_code,
rtx, rtx, rtx, bool);
+extern void s390_expand_tbegin (rtx, rtx, rtx, bool);
extern rtx s390_return_addr_rtx (int, rtx);
extern rtx s390_back_chain_rtx (void);
extern rtx s390_emit_call (rtx, rtx, rtx, rtx);
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 30c34901f8d..2cacf6f52ad 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -367,6 +367,10 @@ struct GTY(()) machine_function
const char *some_ld_name;
bool has_landing_pad_p;
+
+ /* True if the current function may contain a tbegin clobbering
+ FPRs. */
+ bool tbegin_p;
};
/* Few accessor macros for struct cfun->machine->s390_frame_layout. */
@@ -824,9 +828,9 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
*op1 = constm1_rtx;
}
- /* Remove redundant UNSPEC_CCU_TO_INT conversions if possible. */
+ /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
if (GET_CODE (*op0) == UNSPEC
- && XINT (*op0, 1) == UNSPEC_CCU_TO_INT
+ && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
&& XVECLEN (*op0, 0) == 1
&& GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
&& GET_CODE (XVECEXP (*op0, 0, 0)) == REG
@@ -852,25 +856,35 @@ s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
}
}
- /* Remove redundant UNSPEC_CCZ_TO_INT conversions if possible. */
+ /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
if (GET_CODE (*op0) == UNSPEC
- && XINT (*op0, 1) == UNSPEC_CCZ_TO_INT
+ && XINT (*op0, 1) == UNSPEC_CC_TO_INT
&& XVECLEN (*op0, 0) == 1
- && GET_MODE (XVECEXP (*op0, 0, 0)) == CCZmode
&& GET_CODE (XVECEXP (*op0, 0, 0)) == REG
&& REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
- && *op1 == const0_rtx)
+ && CONST_INT_P (*op1))
{
enum rtx_code new_code = UNKNOWN;
- switch (*code)
+ switch (GET_MODE (XVECEXP (*op0, 0, 0)))
{
- case EQ: new_code = EQ; break;
- case NE: new_code = NE; break;
- default: break;
+ case CCZmode:
+ case CCRAWmode:
+ switch (*code)
+ {
+ case EQ: new_code = EQ; break;
+ case NE: new_code = NE; break;
+ default: break;
+ }
+ break;
+ default: break;
}
if (new_code != UNKNOWN)
{
+ /* For CCRAWmode put the required cc mask into the second
+ operand. */
+ if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode)
+ *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
*op0 = XVECEXP (*op0, 0, 0);
*code = new_code;
}
@@ -942,10 +956,11 @@ s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
const0_rtx);
}
-/* Emit a jump instruction to TARGET. If COND is NULL_RTX, emit an
- unconditional jump, else a conditional jump under condition COND. */
+/* Emit a jump instruction to TARGET and return it. If COND is
+ NULL_RTX, emit an unconditional jump, else a conditional jump under
+ condition COND. */
-void
+rtx
s390_emit_jump (rtx target, rtx cond)
{
rtx insn;
@@ -955,7 +970,7 @@ s390_emit_jump (rtx target, rtx cond)
target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
insn = gen_rtx_SET (VOIDmode, pc_rtx, target);
- emit_jump_insn (insn);
+ return emit_jump_insn (insn);
}
/* Return branch condition mask to implement a branch
@@ -971,7 +986,10 @@ s390_branch_condition_mask (rtx code)
gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
- gcc_assert (XEXP (code, 1) == const0_rtx);
+ gcc_assert (XEXP (code, 1) == const0_rtx
+ || (GET_MODE (XEXP (code, 0)) == CCRAWmode
+ && CONST_INT_P (XEXP (code, 1))));
+
switch (GET_MODE (XEXP (code, 0)))
{
@@ -1145,6 +1163,17 @@ s390_branch_condition_mask (rtx code)
}
break;
+ case CCRAWmode:
+ switch (GET_CODE (code))
+ {
+ case EQ:
+ return INTVAL (XEXP (code, 1));
+ case NE:
+ return (INTVAL (XEXP (code, 1))) ^ 0xf;
+ default:
+ gcc_unreachable ();
+ }
+
default:
return -1;
}
@@ -1204,7 +1233,9 @@ s390_branch_condition_mnemonic (rtx code, int inv)
if (GET_CODE (XEXP (code, 0)) == REG
&& REGNO (XEXP (code, 0)) == CC_REGNUM
- && XEXP (code, 1) == const0_rtx)
+ && (XEXP (code, 1) == const0_rtx
+ || (GET_MODE (XEXP (code, 0)) == CCRAWmode
+ && CONST_INT_P (XEXP (code, 1)))))
mask = s390_branch_condition_mask (code);
else
mask = s390_compare_and_branch_condition_mask (code);
@@ -1602,6 +1633,11 @@ s390_option_override (void)
if (!(target_flags_explicit & MASK_HARD_DFP) && TARGET_DFP)
target_flags |= MASK_HARD_DFP;
+ /* Enable hardware transactions if available and not explicitly
+ disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
+ if (!(target_flags_explicit & MASK_OPT_HTM) && TARGET_CPU_HTM && TARGET_ZARCH)
+ target_flags |= MASK_OPT_HTM;
+
if (TARGET_HARD_DFP && !TARGET_DFP)
{
if (target_flags_explicit & MASK_HARD_DFP)
@@ -2017,14 +2053,18 @@ s390_decompose_address (rtx addr, struct s390_address *out)
Thus we don't check the displacement for validity here. If after
elimination the displacement turns out to be invalid after all,
this is fixed up by reload in any case. */
- if (base != arg_pointer_rtx
- && indx != arg_pointer_rtx
- && base != return_address_pointer_rtx
- && indx != return_address_pointer_rtx
- && base != frame_pointer_rtx
- && indx != frame_pointer_rtx
- && base != virtual_stack_vars_rtx
- && indx != virtual_stack_vars_rtx)
+ /* LRA maintains always displacements up to date and we need to
+ know the displacement is right during all LRA not only at the
+ final elimination. */
+ if (lra_in_progress
+ || (base != arg_pointer_rtx
+ && indx != arg_pointer_rtx
+ && base != return_address_pointer_rtx
+ && indx != return_address_pointer_rtx
+ && base != frame_pointer_rtx
+ && indx != frame_pointer_rtx
+ && base != virtual_stack_vars_rtx
+ && indx != virtual_stack_vars_rtx))
if (!DISP_IN_RANGE (offset))
return false;
}
@@ -3189,7 +3229,9 @@ s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
/* We need a scratch register when loading a PLUS expression which
is not a legitimate operand of the LOAD ADDRESS instruction. */
- if (in_p && s390_plus_operand (x, mode))
+ /* LRA can deal with transformation of plus op very well -- so we
+ don't need to prompt LRA in this case. */
+ if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
sri->icode = (TARGET_64BIT ?
CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
@@ -7017,7 +7059,7 @@ s390_chunkify_start (void)
if (LABEL_P (insn)
&& (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
{
- rtx vec_insn = next_real_insn (insn);
+ rtx vec_insn = NEXT_INSN (insn);
if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
}
@@ -7027,6 +7069,8 @@ s390_chunkify_start (void)
else if (JUMP_P (insn))
{
rtx pat = PATTERN (insn);
+ rtx table;
+
if (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 2)
pat = XVECEXP (pat, 0, 0);
@@ -7040,28 +7084,18 @@ s390_chunkify_start (void)
bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
}
}
- else if (GET_CODE (pat) == PARALLEL
- && XVECLEN (pat, 0) == 2
- && GET_CODE (XVECEXP (pat, 0, 0)) == SET
- && GET_CODE (XVECEXP (pat, 0, 1)) == USE
- && GET_CODE (XEXP (XVECEXP (pat, 0, 1), 0)) == LABEL_REF)
- {
- /* Find the jump table used by this casesi jump. */
- rtx vec_label = XEXP (XEXP (XVECEXP (pat, 0, 1), 0), 0);
- rtx vec_insn = next_real_insn (vec_label);
- if (vec_insn && JUMP_TABLE_DATA_P (vec_insn))
- {
- rtx vec_pat = PATTERN (vec_insn);
- int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
-
- for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
- {
- rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
-
- if (s390_find_pool (pool_list, label)
- != s390_find_pool (pool_list, insn))
- bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
- }
+ else if (tablejump_p (insn, NULL, &table))
+ {
+ rtx vec_pat = PATTERN (table);
+ int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
+
+ for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
+ {
+ rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
+
+ if (s390_find_pool (pool_list, label)
+ != s390_find_pool (pool_list, insn))
+ bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
}
}
}
@@ -7336,11 +7370,11 @@ s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *d
if (GET_CODE (setreg) == SUBREG)
{
rtx inner = SUBREG_REG (setreg);
- if (!GENERAL_REG_P (inner))
+ if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
return;
regno = subreg_regno (setreg);
}
- else if (GENERAL_REG_P (setreg))
+ else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
regno = REGNO (setreg);
else
return;
@@ -7363,13 +7397,13 @@ s390_regs_ever_clobbered (int *regs_ever_clobbered)
rtx cur_insn;
unsigned int i;
- memset (regs_ever_clobbered, 0, 16 * sizeof (int));
+ memset (regs_ever_clobbered, 0, 32 * sizeof (int));
/* For non-leaf functions we have to consider all call clobbered regs to be
clobbered. */
if (!crtl->is_leaf)
{
- for (i = 0; i < 16; i++)
+ for (i = 0; i < 32; i++)
regs_ever_clobbered[i] = call_really_used_regs[i];
}
@@ -7391,7 +7425,7 @@ s390_regs_ever_clobbered (int *regs_ever_clobbered)
See expand_builtin_unwind_init. For regs_ever_live this is done by
reload. */
if (cfun->has_nonlocal_label)
- for (i = 0; i < 16; i++)
+ for (i = 0; i < 32; i++)
if (!call_really_used_regs[i])
regs_ever_clobbered[i] = 1;
@@ -7457,17 +7491,6 @@ s390_register_info (int clobbered_regs[])
{
int i, j;
- /* fprs 8 - 15 are call saved for 64 Bit ABI. */
- cfun_frame_layout.fpr_bitmap = 0;
- cfun_frame_layout.high_fprs = 0;
- if (TARGET_64BIT)
- for (i = 24; i < 32; i++)
- if (df_regs_ever_live_p (i) && !global_regs[i])
- {
- cfun_set_fpr_bit (i - 16);
- cfun_frame_layout.high_fprs++;
- }
-
/* Find first and last gpr to be saved. We trust regs_ever_live
data, except that we don't save and restore global registers.
@@ -7476,6 +7499,29 @@ s390_register_info (int clobbered_regs[])
s390_regs_ever_clobbered (clobbered_regs);
+ /* fprs 8 - 15 are call saved for 64 Bit ABI. */
+ if (!epilogue_completed)
+ {
+ cfun_frame_layout.fpr_bitmap = 0;
+ cfun_frame_layout.high_fprs = 0;
+ if (TARGET_64BIT)
+ for (i = 24; i < 32; i++)
+ /* During reload we have to use the df_regs_ever_live infos
+ since reload is marking FPRs used as spill slots there as
+ live before actually making the code changes. Without
+ this we fail during elimination offset verification. */
+ if ((clobbered_regs[i]
+ || (df_regs_ever_live_p (i)
+ && (lra_in_progress
+ || reload_in_progress
+ || crtl->saves_all_registers)))
+ && !global_regs[i])
+ {
+ cfun_set_fpr_bit (i - 16);
+ cfun_frame_layout.high_fprs++;
+ }
+ }
+
for (i = 0; i < 16; i++)
clobbered_regs[i] = clobbered_regs[i] && !global_regs[i] && !fixed_regs[i];
@@ -7726,7 +7772,7 @@ s390_init_frame_layout (void)
{
HOST_WIDE_INT frame_size;
int base_used;
- int clobbered_regs[16];
+ int clobbered_regs[32];
/* On S/390 machines, we may need to perform branch splitting, which
will require both base and return address register. We have no
@@ -7761,6 +7807,157 @@ s390_init_frame_layout (void)
while (frame_size != cfun_frame_layout.frame_size);
}
+/* Remove the FPR clobbers from a tbegin insn if it can be proven that
+ the TX is nonescaping. A transaction is considered escaping if
+ there is at least one path from tbegin returning CC0 to the
+ function exit block without an tend.
+
+ The check so far has some limitations:
+ - only single tbegin/tend BBs are supported
+ - the first cond jump after tbegin must separate the CC0 path from ~CC0
+ - when CC is copied to a GPR and the CC0 check is done with the GPR
+ this is not supported
+*/
+
+static void
+s390_optimize_nonescaping_tx (void)
+{
+ const unsigned int CC0 = 1 << 3;
+ basic_block tbegin_bb = NULL;
+ basic_block tend_bb = NULL;
+ basic_block bb;
+ rtx insn;
+ bool result = true;
+ int bb_index;
+ rtx tbegin_insn = NULL_RTX;
+
+ if (!cfun->machine->tbegin_p)
+ return;
+
+ for (bb_index = 0; bb_index < n_basic_blocks; bb_index++)
+ {
+ bb = BASIC_BLOCK (bb_index);
+
+ FOR_BB_INSNS (bb, insn)
+ {
+ rtx ite, cc, pat, target;
+ unsigned HOST_WIDE_INT mask;
+
+ if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
+ continue;
+
+ pat = PATTERN (insn);
+
+ if (GET_CODE (pat) == PARALLEL)
+ pat = XVECEXP (pat, 0, 0);
+
+ if (GET_CODE (pat) != SET
+ || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
+ continue;
+
+ if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
+ {
+ rtx tmp;
+
+ tbegin_insn = insn;
+
+ /* Just return if the tbegin doesn't have clobbers. */
+ if (GET_CODE (PATTERN (insn)) != PARALLEL)
+ return;
+
+ if (tbegin_bb != NULL)
+ return;
+
+ /* Find the next conditional jump. */
+ for (tmp = NEXT_INSN (insn);
+ tmp != NULL_RTX;
+ tmp = NEXT_INSN (tmp))
+ {
+ if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
+ return;
+ if (!JUMP_P (tmp))
+ continue;
+
+ ite = SET_SRC (PATTERN (tmp));
+ if (GET_CODE (ite) != IF_THEN_ELSE)
+ continue;
+
+ cc = XEXP (XEXP (ite, 0), 0);
+ if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
+ || GET_MODE (cc) != CCRAWmode
+ || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
+ return;
+
+ if (bb->succs->length () != 2)
+ return;
+
+ mask = INTVAL (XEXP (XEXP (ite, 0), 1));
+ if (GET_CODE (XEXP (ite, 0)) == NE)
+ mask ^= 0xf;
+
+ if (mask == CC0)
+ target = XEXP (ite, 1);
+ else if (mask == (CC0 ^ 0xf))
+ target = XEXP (ite, 2);
+ else
+ return;
+
+ {
+ edge_iterator ei;
+ edge e1, e2;
+
+ ei = ei_start (bb->succs);
+ e1 = ei_safe_edge (ei);
+ ei_next (&ei);
+ e2 = ei_safe_edge (ei);
+
+ if (e2->flags & EDGE_FALLTHRU)
+ {
+ e2 = e1;
+ e1 = ei_safe_edge (ei);
+ }
+
+ if (!(e1->flags & EDGE_FALLTHRU))
+ return;
+
+ tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
+ }
+ if (tmp == BB_END (bb))
+ break;
+ }
+ }
+
+ if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
+ {
+ if (tend_bb != NULL)
+ return;
+ tend_bb = bb;
+ }
+ }
+ }
+
+ /* Either we successfully remove the FPR clobbers here or we are not
+ able to do anything for this TX. Both cases don't qualify for
+ another look. */
+ cfun->machine->tbegin_p = false;
+
+ if (tbegin_bb == NULL || tend_bb == NULL)
+ return;
+
+ calculate_dominance_info (CDI_POST_DOMINATORS);
+ result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
+ free_dominance_info (CDI_POST_DOMINATORS);
+
+ if (!result)
+ return;
+
+ PATTERN (tbegin_insn) = XVECEXP (PATTERN (tbegin_insn), 0, 0);
+ INSN_CODE (tbegin_insn) = -1;
+ df_insn_rescan (tbegin_insn);
+
+ return;
+}
+
/* Update frame layout. Recompute actual register save data based on
current info and update regs_ever_live for the special registers.
May be called multiple times, but may never cause *more* registers
@@ -7769,7 +7966,7 @@ s390_init_frame_layout (void)
static void
s390_update_frame_layout (void)
{
- int clobbered_regs[16];
+ int clobbered_regs[32];
s390_register_info (clobbered_regs);
@@ -7868,6 +8065,13 @@ s390_class_max_nregs (enum reg_class rclass, enum machine_mode mode)
return (GET_MODE_SIZE (mode) + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
}
+/* Return true if we use LRA instead of reload pass. */
+static bool
+s390_lra_p (void)
+{
+ return s390_lra_flag;
+}
+
/* Return true if register FROM can be eliminated via register TO. */
static bool
@@ -8199,8 +8403,10 @@ s390_emit_prologue (void)
int offset;
int next_fpr = 0;
- /* Complete frame layout. */
+ /* Try to get rid of the FPR clobbers. */
+ s390_optimize_nonescaping_tx ();
+ /* Complete frame layout. */
s390_update_frame_layout ();
/* Annotate all constant pool references to let the scheduler know
@@ -9348,6 +9554,294 @@ s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
return build_va_arg_indirect_ref (addr);
}
+/* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
+ expanders.
+ DEST - Register location where CC will be stored.
+ TDB - Pointer to a 256 byte area where to store the transaction.
+ diagnostic block. NULL if TDB is not needed.
+ RETRY - Retry count value. If non-NULL a retry loop for CC2
+ is emitted
+ CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
+ of the tbegin instruction pattern. */
+
+void
+s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
+{
+ const int CC0 = 1 << 3;
+ const int CC1 = 1 << 2;
+ const int CC3 = 1 << 0;
+ rtx abort_label = gen_label_rtx ();
+ rtx leave_label = gen_label_rtx ();
+ rtx retry_reg = gen_reg_rtx (SImode);
+ rtx retry_label = NULL_RTX;
+ rtx jump;
+ rtx very_unlikely = GEN_INT (REG_BR_PROB_BASE / 100 - 1);
+
+ if (retry != NULL_RTX)
+ {
+ emit_move_insn (retry_reg, retry);
+ retry_label = gen_label_rtx ();
+ emit_label (retry_label);
+ }
+
+ if (clobber_fprs_p)
+ emit_insn (gen_tbegin_1 (tdb,
+ gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+ else
+ emit_insn (gen_tbegin_nofloat_1 (tdb,
+ gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK)));
+
+ jump = s390_emit_jump (abort_label,
+ gen_rtx_NE (VOIDmode,
+ gen_rtx_REG (CCRAWmode, CC_REGNUM),
+ gen_rtx_CONST_INT (VOIDmode, CC0)));
+
+ JUMP_LABEL (jump) = abort_label;
+ LABEL_NUSES (abort_label) = 1;
+ add_reg_note (jump, REG_BR_PROB, very_unlikely);
+
+ /* Initialize CC return value. */
+ emit_move_insn (dest, const0_rtx);
+
+ s390_emit_jump (leave_label, NULL_RTX);
+ LABEL_NUSES (leave_label) = 1;
+ emit_barrier ();
+
+ /* Abort handler code. */
+
+ emit_label (abort_label);
+ if (retry != NULL_RTX)
+ {
+ rtx count = gen_reg_rtx (SImode);
+ jump = s390_emit_jump (leave_label,
+ gen_rtx_EQ (VOIDmode,
+ gen_rtx_REG (CCRAWmode, CC_REGNUM),
+ gen_rtx_CONST_INT (VOIDmode, CC1 | CC3)));
+ LABEL_NUSES (leave_label) = 2;
+ add_reg_note (jump, REG_BR_PROB, very_unlikely);
+
+ /* CC2 - transient failure. Perform retry with ppa. */
+ emit_move_insn (count, retry);
+ emit_insn (gen_subsi3 (count, count, retry_reg));
+ emit_insn (gen_tx_assist (count));
+ jump = emit_jump_insn (gen_doloop_si64 (retry_label,
+ retry_reg,
+ retry_reg));
+ JUMP_LABEL (jump) = retry_label;
+ LABEL_NUSES (retry_label) = 1;
+ }
+
+ emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
+ gen_rtvec (1, gen_rtx_REG (CCRAWmode,
+ CC_REGNUM)),
+ UNSPEC_CC_TO_INT));
+ emit_label (leave_label);
+}
+
+/* Builtins. */
+
+enum s390_builtin
+{
+ S390_BUILTIN_TBEGIN,
+ S390_BUILTIN_TBEGIN_NOFLOAT,
+ S390_BUILTIN_TBEGIN_RETRY,
+ S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+ S390_BUILTIN_TBEGINC,
+ S390_BUILTIN_TEND,
+ S390_BUILTIN_TABORT,
+ S390_BUILTIN_NON_TX_STORE,
+ S390_BUILTIN_TX_NESTING_DEPTH,
+ S390_BUILTIN_TX_ASSIST,
+
+ S390_BUILTIN_max
+};
+
+static enum insn_code const code_for_builtin[S390_BUILTIN_max] = {
+ CODE_FOR_tbegin,
+ CODE_FOR_tbegin_nofloat,
+ CODE_FOR_tbegin_retry,
+ CODE_FOR_tbegin_retry_nofloat,
+ CODE_FOR_tbeginc,
+ CODE_FOR_tend,
+ CODE_FOR_tabort,
+ CODE_FOR_ntstg,
+ CODE_FOR_etnd,
+ CODE_FOR_tx_assist
+};
+
+static void
+s390_init_builtins (void)
+{
+ tree ftype, uint64_type;
+
+ /* void foo (void) */
+ ftype = build_function_type_list (void_type_node, NULL_TREE);
+ add_builtin_function ("__builtin_tbeginc", ftype, S390_BUILTIN_TBEGINC,
+ BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* void foo (int) */
+ ftype = build_function_type_list (void_type_node, integer_type_node,
+ NULL_TREE);
+ add_builtin_function ("__builtin_tabort", ftype,
+ S390_BUILTIN_TABORT, BUILT_IN_MD, NULL, NULL_TREE);
+ add_builtin_function ("__builtin_tx_assist", ftype,
+ S390_BUILTIN_TX_ASSIST, BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* int foo (void *) */
+ ftype = build_function_type_list (integer_type_node, ptr_type_node, NULL_TREE);
+ add_builtin_function ("__builtin_tbegin", ftype, S390_BUILTIN_TBEGIN,
+ BUILT_IN_MD, NULL, NULL_TREE);
+ add_builtin_function ("__builtin_tbegin_nofloat", ftype,
+ S390_BUILTIN_TBEGIN_NOFLOAT,
+ BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* int foo (void *, int) */
+ ftype = build_function_type_list (integer_type_node, ptr_type_node,
+ integer_type_node, NULL_TREE);
+ add_builtin_function ("__builtin_tbegin_retry", ftype,
+ S390_BUILTIN_TBEGIN_RETRY,
+ BUILT_IN_MD,
+ NULL, NULL_TREE);
+ add_builtin_function ("__builtin_tbegin_retry_nofloat", ftype,
+ S390_BUILTIN_TBEGIN_RETRY_NOFLOAT,
+ BUILT_IN_MD,
+ NULL, NULL_TREE);
+
+ /* int foo (void) */
+ ftype = build_function_type_list (integer_type_node, NULL_TREE);
+ add_builtin_function ("__builtin_tx_nesting_depth", ftype,
+ S390_BUILTIN_TX_NESTING_DEPTH,
+ BUILT_IN_MD, NULL, NULL_TREE);
+ add_builtin_function ("__builtin_tend", ftype,
+ S390_BUILTIN_TEND, BUILT_IN_MD, NULL, NULL_TREE);
+
+ /* void foo (uint64_t *, uint64_t) */
+ if (TARGET_64BIT)
+ uint64_type = long_unsigned_type_node;
+ else
+ uint64_type = long_long_unsigned_type_node;
+
+ ftype = build_function_type_list (void_type_node,
+ build_pointer_type (uint64_type),
+ uint64_type, NULL_TREE);
+ add_builtin_function ("__builtin_non_tx_store", ftype,
+ S390_BUILTIN_NON_TX_STORE,
+ BUILT_IN_MD, NULL, NULL_TREE);
+}
+
+/* Expand an expression EXP that calls a built-in function,
+ with result going to TARGET if that's convenient
+ (and in mode MODE if that's convenient).
+ SUBTARGET may be used as the target for computing one of EXP's operands.
+ IGNORE is nonzero if the value is to be ignored. */
+
+static rtx
+s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
+ enum machine_mode mode ATTRIBUTE_UNUSED,
+ int ignore ATTRIBUTE_UNUSED)
+{
+#define MAX_ARGS 2
+
+ tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
+ unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
+ enum insn_code icode;
+ rtx op[MAX_ARGS], pat;
+ int arity;
+ bool nonvoid;
+ tree arg;
+ call_expr_arg_iterator iter;
+
+ if (fcode >= S390_BUILTIN_max)
+ internal_error ("bad builtin fcode");
+ icode = code_for_builtin[fcode];
+ if (icode == 0)
+ internal_error ("bad builtin fcode");
+
+ if (!TARGET_ZEC12)
+ error ("Transactional execution builtins require zEC12 or later\n");
+
+ if (!TARGET_HTM && TARGET_ZEC12)
+ error ("Transactional execution builtins not enabled (-mtx)\n");
+
+ /* Set a flag in the machine specific cfun part in order to support
+ saving/restoring of FPRs. */
+ if (fcode == S390_BUILTIN_TBEGIN || fcode == S390_BUILTIN_TBEGIN_RETRY)
+ cfun->machine->tbegin_p = true;
+
+ nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
+
+ arity = 0;
+ FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
+ {
+ const struct insn_operand_data *insn_op;
+
+ if (arg == error_mark_node)
+ return NULL_RTX;
+ if (arity >= MAX_ARGS)
+ return NULL_RTX;
+
+ insn_op = &insn_data[icode].operand[arity + nonvoid];
+
+ op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
+
+ if (!(*insn_op->predicate) (op[arity], insn_op->mode))
+ {
+ if (insn_op->predicate == memory_operand)
+ {
+ /* Don't move a NULL pointer into a register. Otherwise
+ we have to rely on combine being able to move it back
+ in order to get an immediate 0 in the instruction. */
+ if (op[arity] != const0_rtx)
+ op[arity] = copy_to_mode_reg (Pmode, op[arity]);
+ op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
+ }
+ else
+ op[arity] = copy_to_mode_reg (insn_op->mode, op[arity]);
+ }
+
+ arity++;
+ }
+
+ if (nonvoid)
+ {
+ enum machine_mode tmode = insn_data[icode].operand[0].mode;
+ if (!target
+ || GET_MODE (target) != tmode
+ || !(*insn_data[icode].operand[0].predicate) (target, tmode))
+ target = gen_reg_rtx (tmode);
+ }
+
+ switch (arity)
+ {
+ case 0:
+ pat = GEN_FCN (icode) (target);
+ break;
+ case 1:
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0]);
+ else
+ pat = GEN_FCN (icode) (op[0]);
+ break;
+ case 2:
+ if (nonvoid)
+ pat = GEN_FCN (icode) (target, op[0], op[1]);
+ else
+ pat = GEN_FCN (icode) (op[0], op[1]);
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ if (!pat)
+ return NULL_RTX;
+ emit_insn (pat);
+
+ if (nonvoid)
+ return target;
+ else
+ return const0_rtx;
+}
+
+
/* Output assembly code for the trampoline template to
stdio stream FILE.
@@ -11003,6 +11497,11 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
#undef TARGET_RETURN_IN_MEMORY
#define TARGET_RETURN_IN_MEMORY s390_return_in_memory
+#undef TARGET_INIT_BUILTINS
+#define TARGET_INIT_BUILTINS s390_init_builtins
+#undef TARGET_EXPAND_BUILTIN
+#define TARGET_EXPAND_BUILTIN s390_expand_builtin
+
#undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
#define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
@@ -11105,6 +11604,9 @@ s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
#undef TARGET_LEGITIMATE_CONSTANT_P
#define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
+#undef TARGET_LRA_P
+#define TARGET_LRA_P s390_lra_p
+
#undef TARGET_CAN_ELIMINATE
#define TARGET_CAN_ELIMINATE s390_can_eliminate
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 43e24d5d112..d53fed7a6f2 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -34,7 +34,8 @@ enum processor_flags
PF_DFP = 16,
PF_Z10 = 32,
PF_Z196 = 64,
- PF_ZEC12 = 128
+ PF_ZEC12 = 128,
+ PF_TX = 256
};
/* This is necessary to avoid a warning about comparing different enum
@@ -61,6 +62,8 @@ enum processor_flags
(s390_arch_flags & PF_Z196)
#define TARGET_CPU_ZEC12 \
(s390_arch_flags & PF_ZEC12)
+#define TARGET_CPU_HTM \
+ (s390_arch_flags & PF_TX)
/* These flags indicate that the generated code should run on a cpu
providing the respective hardware facility when run in
@@ -78,6 +81,8 @@ enum processor_flags
(TARGET_ZARCH && TARGET_CPU_Z196)
#define TARGET_ZEC12 \
(TARGET_ZARCH && TARGET_CPU_ZEC12)
+#define TARGET_HTM \
+ (TARGET_ZARCH && TARGET_CPU_HTM && TARGET_OPT_HTM)
#define TARGET_AVOID_CMP_AND_BRANCH (s390_tune == PROCESSOR_2817_Z196)
@@ -93,23 +98,25 @@ enum processor_flags
#define TARGET_TPF 0
/* Target CPU builtins. */
-#define TARGET_CPU_CPP_BUILTINS() \
- do \
- { \
- builtin_assert ("cpu=s390"); \
- builtin_assert ("machine=s390"); \
- builtin_define ("__s390__"); \
- if (TARGET_ZARCH) \
- builtin_define ("__zarch__"); \
- if (TARGET_64BIT) \
- builtin_define ("__s390x__"); \
- if (TARGET_LONG_DOUBLE_128) \
- builtin_define ("__LONG_DOUBLE_128__"); \
- } \
+#define TARGET_CPU_CPP_BUILTINS() \
+ do \
+ { \
+ builtin_assert ("cpu=s390"); \
+ builtin_assert ("machine=s390"); \
+ builtin_define ("__s390__"); \
+ if (TARGET_ZARCH) \
+ builtin_define ("__zarch__"); \
+ if (TARGET_64BIT) \
+ builtin_define ("__s390x__"); \
+ if (TARGET_LONG_DOUBLE_128) \
+ builtin_define ("__LONG_DOUBLE_128__"); \
+ if (TARGET_HTM) \
+ builtin_define ("__HTM__"); \
+ } \
while (0)
#ifdef DEFAULT_TARGET_64BIT
-#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP)
+#define TARGET_DEFAULT (MASK_64BIT | MASK_ZARCH | MASK_HARD_DFP | MASK_OPT_HTM)
#else
#define TARGET_DEFAULT 0
#endif
@@ -221,7 +228,7 @@ enum processor_flags
/* Alignment on even addresses for LARL instruction. */
#define CONSTANT_ALIGNMENT(EXP, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
-#define DATA_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
+#define DATA_ABI_ALIGNMENT(TYPE, ALIGN) (ALIGN) < 16 ? 16 : (ALIGN)
/* Alignment is not required by the hardware. */
#define STRICT_ALIGNMENT 0
diff --git a/gcc/config/s390/s390.md b/gcc/config/s390/s390.md
index cad4f5f579a..e12d1538a50 100644
--- a/gcc/config/s390/s390.md
+++ b/gcc/config/s390/s390.md
@@ -59,11 +59,17 @@
(define_c_enum "unspec" [
; Miscellaneous
UNSPEC_ROUND
- UNSPEC_CCU_TO_INT
- UNSPEC_CCZ_TO_INT
UNSPEC_ICM
UNSPEC_TIE
+ ; Convert CC into a str comparison result and copy it into an
+ ; integer register
+ ; cc0->0, cc1->1, cc2->-1, (cc3->-1)
+ UNSPEC_STRCMPCC_TO_INT
+
+ ; Copy CC as is into the lower 2 bits of an integer register
+ UNSPEC_CC_TO_INT
+
; GOT/PLT and lt-relative accesses
UNSPEC_LTREL_OFFSET
UNSPEC_LTREL_BASE
@@ -138,6 +144,15 @@
; Atomic Support
UNSPECV_CAS
UNSPECV_ATOMIC_OP
+
+ ; Transactional Execution support
+ UNSPECV_TBEGIN
+ UNSPECV_TBEGINC
+ UNSPECV_TEND
+ UNSPECV_TABORT
+ UNSPECV_ETND
+ UNSPECV_NTSTG
+ UNSPECV_PPA
])
;;
@@ -191,6 +206,9 @@
(PFPO_OP1_TYPE_SHIFT 8)
])
+; Immediate operands for tbegin and tbeginc
+(define_constants [(TBEGIN_MASK 65292)]) ; 0xff0c
+(define_constants [(TBEGINC_MASK 65288)]) ; 0xff08
;; Instruction operand type as used in the Principles of Operation.
;; Used to determine defaults for length and other attribute values.
@@ -277,7 +295,8 @@
(define_attr "cpu" "g5,g6,z900,z990,z9_109,z9_ec,z10,z196,zEC12"
(const (symbol_ref "s390_tune_attr")))
-(define_attr "cpu_facility" "standard,ieee,zarch,longdisp,extimm,dfp,z10,z196,zEC12"
+(define_attr "cpu_facility"
+ "standard,ieee,zarch,cpu_zarch,longdisp,extimm,dfp,z10,z196,zEC12"
(const_string "standard"))
(define_attr "enabled" ""
@@ -304,6 +323,10 @@
(match_test "TARGET_DFP"))
(const_int 1)
+ (and (eq_attr "cpu_facility" "cpu_zarch")
+ (match_test "TARGET_CPU_ZARCH"))
+ (const_int 1)
+
(and (eq_attr "cpu_facility" "z10")
(match_test "TARGET_Z10"))
(const_int 1)
@@ -2246,7 +2269,7 @@
(define_insn "movcc"
[(set (match_operand:CC 0 "nonimmediate_operand" "=d,c,d,d,d,R,T")
- (match_operand:CC 1 "nonimmediate_operand" "d,d,c,R,T,d,d"))]
+ (match_operand:CC 1 "nonimmediate_operand" " d,d,c,R,T,d,d"))]
""
"@
lr\t%0,%1
@@ -2578,7 +2601,7 @@
(use (reg:SI 0))])
(parallel
[(set (match_operand:SI 0 "register_operand" "=d")
- (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_CCU_TO_INT))
+ (unspec:SI [(reg:CCU CC_REGNUM)] UNSPEC_STRCMPCC_TO_INT))
(clobber (reg:CC CC_REGNUM))])]
""
{
@@ -2690,7 +2713,7 @@
"(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
"#"
[(set_attr "type" "cs")
- (set_attr "cpu_facility" "*,*,z10,*")])
+ (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
(define_split
[(set (match_operand:BLK 0 "memory_operand" "")
@@ -2820,7 +2843,7 @@
(match_dup 2)]
UNSPEC_TDC_INSN))
(set (match_operand:SI 0 "register_operand" "=d")
- (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))]
+ (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
"TARGET_HARD_FLOAT"
{
operands[2] = GEN_INT (S390_TDC_SIGNBIT_SET);
@@ -2832,12 +2855,21 @@
(match_dup 2)]
UNSPEC_TDC_INSN))
(set (match_operand:SI 0 "register_operand" "=d")
- (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CCZ_TO_INT))]
+ (unspec:SI [(reg:CCZ CC_REGNUM)] UNSPEC_CC_TO_INT))]
"TARGET_HARD_FLOAT"
{
operands[2] = GEN_INT (S390_TDC_INFINITY);
})
+(define_insn_and_split "*cc_to_int"
+ [(set (match_operand:SI 0 "register_operand" "=d")
+ (unspec:SI [(match_operand 1 "register_operand" "0")]
+ UNSPEC_CC_TO_INT))]
+ "operands != NULL"
+ "#"
+ "reload_completed"
+ [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))])
+
; This insn is used to generate all variants of the Test Data Class
; instruction, namely tcxb, tcdb, and tceb. The insn's first operand
; is the register to be tested and the second one is the bit mask
@@ -2853,14 +2885,6 @@
[(set_attr "op_type" "RXE")
(set_attr "type" "fsimp<mode>")])
-(define_insn_and_split "*ccz_to_int"
- [(set (match_operand:SI 0 "register_operand" "=d")
- (unspec:SI [(match_operand:CCZ 1 "register_operand" "0")]
- UNSPEC_CCZ_TO_INT))]
- ""
- "#"
- "reload_completed"
- [(set (match_dup 0) (lshiftrt:SI (match_dup 0) (const_int 28)))])
;
@@ -2899,7 +2923,7 @@
"(GET_MODE (operands[1]) == Pmode || GET_MODE (operands[1]) == VOIDmode)"
"#"
[(set_attr "type" "cs")
- (set_attr "cpu_facility" "*,*,z10,*")])
+ (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
(define_split
[(set (match_operand:BLK 0 "memory_operand" "")
@@ -3075,7 +3099,7 @@
"(GET_MODE (operands[2]) == Pmode || GET_MODE (operands[2]) == VOIDmode)"
"#"
[(set_attr "type" "cs")
- (set_attr "cpu_facility" "*,*,z10,*")])
+ (set_attr "cpu_facility" "*,*,z10,cpu_zarch")])
(define_split
[(set (reg:CCU CC_REGNUM)
@@ -3205,7 +3229,7 @@
(define_insn_and_split "cmpint"
[(set (match_operand:SI 0 "register_operand" "=d")
(unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
- UNSPEC_CCU_TO_INT))
+ UNSPEC_STRCMPCC_TO_INT))
(clobber (reg:CC CC_REGNUM))]
""
"#"
@@ -3218,10 +3242,10 @@
(define_insn_and_split "*cmpint_cc"
[(set (reg CC_REGNUM)
(compare (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
- UNSPEC_CCU_TO_INT)
+ UNSPEC_STRCMPCC_TO_INT)
(const_int 0)))
(set (match_operand:SI 0 "register_operand" "=d")
- (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT))]
+ (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT))]
"s390_match_ccmode (insn, CCSmode)"
"#"
"&& reload_completed"
@@ -3238,7 +3262,7 @@
(define_insn_and_split "*cmpint_sign"
[(set (match_operand:DI 0 "register_operand" "=d")
(sign_extend:DI (unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
- UNSPEC_CCU_TO_INT)))
+ UNSPEC_STRCMPCC_TO_INT)))
(clobber (reg:CC CC_REGNUM))]
"TARGET_ZARCH"
"#"
@@ -3252,11 +3276,11 @@
[(set (reg CC_REGNUM)
(compare (ashiftrt:DI (ashift:DI (subreg:DI
(unspec:SI [(match_operand:CCU 1 "register_operand" "0")]
- UNSPEC_CCU_TO_INT) 0)
+ UNSPEC_STRCMPCC_TO_INT) 0)
(const_int 32)) (const_int 32))
(const_int 0)))
(set (match_operand:DI 0 "register_operand" "=d")
- (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_CCU_TO_INT)))]
+ (sign_extend:DI (unspec:SI [(match_dup 1)] UNSPEC_STRCMPCC_TO_INT)))]
"s390_match_ccmode (insn, CCSmode) && TARGET_ZARCH"
"#"
"&& reload_completed"
@@ -5507,7 +5531,7 @@
(if_then_else:GPR
(match_operator 1 "s390_comparison"
[(match_operand 2 "cc_reg_operand" " c,c, c, c, c, c, c")
- (const_int 0)])
+ (match_operand 5 "const_int_operand" "")])
(match_operand:GPR 3 "nonimmediate_operand" " d,0,QS, 0, d, 0,QS")
(match_operand:GPR 4 "nonimmediate_operand" " 0,d, 0,QS, 0, d,QS")))]
"TARGET_Z196"
@@ -7907,7 +7931,8 @@
(define_insn "*cjump_64"
[(set (pc)
(if_then_else
- (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+ (match_operator 1 "s390_comparison" [(reg CC_REGNUM)
+ (match_operand 2 "const_int_operand" "")])
(label_ref (match_operand 0 "" ""))
(pc)))]
"TARGET_CPU_ZARCH"
@@ -7926,7 +7951,8 @@
(define_insn "*cjump_31"
[(set (pc)
(if_then_else
- (match_operator 1 "s390_comparison" [(reg CC_REGNUM) (const_int 0)])
+ (match_operator 1 "s390_comparison" [(reg CC_REGNUM)
+ (match_operand 2 "const_int_operand" "")])
(label_ref (match_operand 0 "" ""))
(pc)))]
"!TARGET_CPU_ZARCH"
@@ -9795,3 +9821,217 @@
"cpsdr\t%0,%2,%1"
[(set_attr "op_type" "RRF")
(set_attr "type" "fsimp<mode>")])
+
+
+;;
+;;- Transactional execution instructions
+;;
+
+; This splitter helps combine to make use of CC directly when
+; comparing the integer result of a tbegin builtin with a constant.
+; The unspec is already removed by canonicalize_comparison. So this
+; splitters only job is to turn the PARALLEL into separate insns
+; again. Unfortunately this only works with the very first cc/int
+; compare since combine is not able to deal with data flow across
+; basic block boundaries.
+
+; It needs to be an insn pattern as well since combine does not apply
+; the splitter directly. Combine would only use it if it actually
+; would reduce the number of instructions.
+(define_insn_and_split "*ccraw_to_int"
+ [(set (pc)
+ (if_then_else
+ (match_operator 0 "s390_eqne_operator"
+ [(reg:CCRAW CC_REGNUM)
+ (match_operand 1 "const_int_operand" "")])
+ (label_ref (match_operand 2 "" ""))
+ (pc)))
+ (set (match_operand:SI 3 "register_operand" "=d")
+ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
+ ""
+ "#"
+ ""
+ [(set (match_dup 3)
+ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))
+ (set (pc)
+ (if_then_else (match_op_dup 0 [(reg:CCRAW CC_REGNUM) (match_dup 1)])
+ (label_ref (match_dup 2))
+ (pc)))]
+ "")
+
+; Non-constrained transaction begin
+
+(define_expand "tbegin"
+ [(match_operand:SI 0 "register_operand" "=d")
+ (match_operand:BLK 1 "memory_operand" "=Q")]
+ "TARGET_HTM"
+{
+ s390_expand_tbegin (operands[0], operands[1], NULL_RTX, true);
+ DONE;
+})
+
+(define_expand "tbegin_nofloat"
+ [(match_operand:SI 0 "register_operand" "=d")
+ (match_operand:BLK 1 "memory_operand" "=Q")]
+ "TARGET_HTM"
+{
+ s390_expand_tbegin (operands[0], operands[1], NULL_RTX, false);
+ DONE;
+})
+
+(define_expand "tbegin_retry"
+ [(match_operand:SI 0 "register_operand" "=d")
+ (match_operand:BLK 1 "memory_operand" "=Q")
+ (match_operand 2 "const_int_operand")]
+ "TARGET_HTM"
+{
+ s390_expand_tbegin (operands[0], operands[1], operands[2], true);
+ DONE;
+})
+
+(define_expand "tbegin_retry_nofloat"
+ [(match_operand:SI 0 "register_operand" "=d")
+ (match_operand:BLK 1 "memory_operand" "=Q")
+ (match_operand 2 "const_int_operand")]
+ "TARGET_HTM"
+{
+ s390_expand_tbegin (operands[0], operands[1], operands[2], false);
+ DONE;
+})
+
+(define_insn "tbegin_1"
+ [(set (reg:CCRAW CC_REGNUM)
+ (unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand" "=Q")
+ (match_operand 1 "const_int_operand" " D")]
+ UNSPECV_TBEGIN))
+ (clobber (reg:DF 16))
+ (clobber (reg:DF 17))
+ (clobber (reg:DF 18))
+ (clobber (reg:DF 19))
+ (clobber (reg:DF 20))
+ (clobber (reg:DF 21))
+ (clobber (reg:DF 22))
+ (clobber (reg:DF 23))
+ (clobber (reg:DF 24))
+ (clobber (reg:DF 25))
+ (clobber (reg:DF 26))
+ (clobber (reg:DF 27))
+ (clobber (reg:DF 28))
+ (clobber (reg:DF 29))
+ (clobber (reg:DF 30))
+ (clobber (reg:DF 31))]
+; CONST_OK_FOR_CONSTRAINT_P does not work with D constraint since D is
+; not supposed to be used for immediates (see genpreds.c).
+ "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff"
+ "tbegin\t%0,%x1"
+ [(set_attr "op_type" "SIL")])
+
+; Same as above but without the FPR clobbers
+(define_insn "tbegin_nofloat_1"
+ [(set (reg:CCRAW CC_REGNUM)
+ (unspec_volatile:CCRAW [(match_operand:BLK 0 "memory_operand" "=Q")
+ (match_operand 1 "const_int_operand" " D")]
+ UNSPECV_TBEGIN))]
+ "TARGET_HTM && INTVAL (operands[1]) >= 0 && INTVAL (operands[1]) <= 0xffff"
+ "tbegin\t%0,%x1"
+ [(set_attr "op_type" "SIL")])
+
+
+; Constrained transaction begin
+
+(define_expand "tbeginc"
+ [(set (reg:CCRAW CC_REGNUM)
+ (unspec_volatile:CCRAW [(const_int TBEGINC_MASK)]
+ UNSPECV_TBEGINC))]
+ "TARGET_HTM"
+ "")
+
+(define_insn "*tbeginc_1"
+ [(set (reg:CCRAW CC_REGNUM)
+ (unspec_volatile:CCRAW [(match_operand 0 "const_int_operand" " D")]
+ UNSPECV_TBEGINC))]
+ "TARGET_HTM && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 0xffff"
+ "tbeginc\t0,%x0"
+ [(set_attr "op_type" "SIL")])
+
+; Transaction end
+
+(define_expand "tend"
+ [(set (reg:CCRAW CC_REGNUM)
+ (unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND))
+ (set (match_operand:SI 0 "register_operand" "=d")
+ (unspec:SI [(reg:CCRAW CC_REGNUM)] UNSPEC_CC_TO_INT))]
+ "TARGET_HTM"
+ "")
+
+(define_insn "*tend_1"
+ [(set (reg:CCRAW CC_REGNUM)
+ (unspec_volatile:CCRAW [(const_int 0)] UNSPECV_TEND))]
+ "TARGET_HTM"
+ "tend"
+ [(set_attr "op_type" "S")])
+
+; Transaction abort
+
+(define_expand "tabort"
+ [(unspec_volatile [(match_operand 0 "shift_count_or_setmem_operand" "")]
+ UNSPECV_TABORT)]
+ "TARGET_HTM && operands != NULL"
+{
+ if (CONST_INT_P (operands[0])
+ && INTVAL (operands[0]) >= 0 && INTVAL (operands[0]) <= 255)
+ {
+ error ("Invalid transaction abort code: " HOST_WIDE_INT_PRINT_DEC
+ ". Values in range 0 through 255 are reserved.",
+ INTVAL (operands[0]));
+ FAIL;
+ }
+})
+
+(define_insn "*tabort_1"
+ [(unspec_volatile [(match_operand 0 "shift_count_or_setmem_operand" "")]
+ UNSPECV_TABORT)]
+ "TARGET_HTM && operands != NULL"
+ "tabort\t%Y0"
+ [(set_attr "op_type" "S")])
+
+; Transaction extract nesting depth
+
+(define_insn "etnd"
+ [(set (match_operand:SI 0 "register_operand" "=d")
+ (unspec_volatile:SI [(const_int 0)] UNSPECV_ETND))]
+ "TARGET_HTM"
+ "etnd\t%0"
+ [(set_attr "op_type" "RRE")])
+
+; Non-transactional store
+
+(define_insn "ntstg"
+ [(set (match_operand:DI 0 "memory_operand" "=RT")
+ (unspec_volatile:DI [(match_operand:DI 1 "register_operand" "d")]
+ UNSPECV_NTSTG))]
+ "TARGET_HTM"
+ "ntstg\t%1,%0"
+ [(set_attr "op_type" "RXY")])
+
+; Transaction perform processor assist
+
+(define_expand "tx_assist"
+ [(set (match_dup 1) (const_int 0))
+ (unspec_volatile [(match_operand:SI 0 "register_operand" "d")
+ (match_dup 1)
+ (const_int 1)]
+ UNSPECV_PPA)]
+ "TARGET_HTM"
+{
+ operands[1] = gen_reg_rtx (SImode);
+})
+
+(define_insn "*ppa"
+ [(unspec_volatile [(match_operand:SI 0 "register_operand" "d")
+ (match_operand:SI 1 "register_operand" "d")
+ (match_operand 2 "const_int_operand" "I")]
+ UNSPECV_PPA)]
+ "TARGET_HTM && INTVAL (operands[2]) < 16"
+ "ppa\t%0,%1,1"
+ [(set_attr "op_type" "RRF")])
diff --git a/gcc/config/s390/s390.opt b/gcc/config/s390/s390.opt
index b326441173c..7dedb836701 100644
--- a/gcc/config/s390/s390.opt
+++ b/gcc/config/s390/s390.opt
@@ -104,6 +104,10 @@ mlong-double-64
Target Report RejectNegative Negative(mlong-double-128) InverseMask(LONG_DOUBLE_128)
Use 64-bit long double
+mhtm
+Target Report Mask(OPT_HTM)
+Use hardware transactional execution instructions
+
mpacked-stack
Target Report Mask(PACKED_STACK)
Use packed stack layout
@@ -149,3 +153,7 @@ Target Report Joined RejectNegative UInteger Var(s390_branch_cost) Init(1)
Set the branch costs for conditional branch instructions. Reasonable
values are small, non-negative integers. The default branch cost is
1.
+
+mlra
+Target Report Var(s390_lra_flag) Init(1) Save
+Use LRA instead of reload
diff --git a/gcc/config/s390/s390intrin.h b/gcc/config/s390/s390intrin.h
new file mode 100644
index 00000000000..e1a00ce58e3
--- /dev/null
+++ b/gcc/config/s390/s390intrin.h
@@ -0,0 +1,33 @@
+/* S/390 System z specific intrinsics
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ Contributed by Andreas Krebbel (Andreas.Krebbel@de.ibm.com)
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 3, or (at your option) any later
+version.
+
+GCC is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3. If not see
+<http://www.gnu.org/licenses/>. */
+
+#ifndef _S390INTRIN_H
+#define _S390INTRIN_H
+
+#ifndef __s390__
+ #error s390intrin.h included on wrong platform/compiler
+#endif
+
+#ifdef __HTM__
+#include <htmintrin.h>
+#endif
+
+
+#endif /* _S390INTRIN_H*/
diff --git a/gcc/config/sh/predicates.md b/gcc/config/sh/predicates.md
index 25949c62d23..998ba7300ad 100644
--- a/gcc/config/sh/predicates.md
+++ b/gcc/config/sh/predicates.md
@@ -398,9 +398,13 @@
(define_predicate "general_extend_operand"
(match_code "subreg,reg,mem,truncate")
{
- return (GET_CODE (op) == TRUNCATE
- ? arith_operand
- : nonimmediate_operand) (op, mode);
+ if (GET_CODE (op) == TRUNCATE)
+ return arith_operand (op, mode);
+
+ if (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op))))
+ return general_movsrc_operand (op, mode);
+
+ return nonimmediate_operand (op, mode);
})
;; Returns 1 if OP is a simple register address.
@@ -468,17 +472,36 @@
return 0;
}
- if ((mode == QImode || mode == HImode)
- && mode == GET_MODE (op)
- && (MEM_P (op)
- || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
+ if (mode == GET_MODE (op)
+ && (MEM_P (op) || (GET_CODE (op) == SUBREG && MEM_P (SUBREG_REG (op)))))
{
- rtx x = XEXP ((MEM_P (op) ? op : SUBREG_REG (op)), 0);
+ rtx mem_rtx = MEM_P (op) ? op : SUBREG_REG (op);
+ rtx x = XEXP (mem_rtx, 0);
- if (GET_CODE (x) == PLUS
+ if ((mode == QImode || mode == HImode)
+ && GET_CODE (x) == PLUS
&& REG_P (XEXP (x, 0))
&& CONST_INT_P (XEXP (x, 1)))
return sh_legitimate_index_p (mode, XEXP (x, 1), TARGET_SH2A, false);
+
+ /* Allow reg+reg addressing here without validating the register
+ numbers. Usually one of the regs must be R0 or a pseudo reg.
+ In some cases it can happen that arguments from hard regs are
+ propagated directly into address expressions. In this cases reload
+ will have to fix it up later. However, allow this only for native
+ 1, 2 or 4 byte addresses. */
+ if (can_create_pseudo_p () && GET_CODE (x) == PLUS
+ && GET_MODE_SIZE (mode) <= 4
+ && REG_P (XEXP (x, 0)) && REG_P (XEXP (x, 1)))
+ return true;
+
+ /* 'general_operand' does not allow volatile mems during RTL expansion to
+ avoid matching arithmetic that operates on mems, it seems.
+ On SH this leads to redundant sign extensions for QImode or HImode
+ loads. Thus we mimic the behavior but allow volatile mems. */
+ if (memory_address_addr_space_p (GET_MODE (mem_rtx), x,
+ MEM_ADDR_SPACE (mem_rtx)))
+ return true;
}
if (TARGET_SHMEDIA
@@ -489,6 +512,7 @@
&& GET_CODE (op) == SUBREG && GET_MODE (op) == mode
&& SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
/* FIXME */ abort (); /* return 1; */
+
return general_operand (op, mode);
})
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 5976206f8b4..60f45452036 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -19,12 +19,6 @@ You should have received a copy of the GNU General Public License
along with GCC; see the file COPYING3. If not see
<http://www.gnu.org/licenses/>. */
-/* FIXME: This is a temporary hack, so that we can include <algorithm>
- below. <algorithm> will try to include <cstdlib> which will reference
- malloc & co, which are poisoned by "system.h". The proper solution is
- to include <cstdlib> in "system.h" instead of <stdlib.h>. */
-#include <cstdlib>
-
#include "config.h"
#include "system.h"
#include "coretypes.h"
diff --git a/gcc/config/sh/sh.md b/gcc/config/sh/sh.md
index 71ad1c1a2f6..8a140687654 100644
--- a/gcc/config/sh/sh.md
+++ b/gcc/config/sh/sh.md
@@ -12251,10 +12251,10 @@ label:
;; FMA (fused multiply-add) patterns
(define_expand "fmasf4"
- [(set (match_operand:SF 0 "fp_arith_reg_operand" "")
- (fma:SF (match_operand:SF 1 "fp_arith_reg_operand" "")
- (match_operand:SF 2 "fp_arith_reg_operand" "")
- (match_operand:SF 3 "fp_arith_reg_operand" "")))]
+ [(set (match_operand:SF 0 "fp_arith_reg_operand")
+ (fma:SF (match_operand:SF 1 "fp_arith_reg_operand")
+ (match_operand:SF 2 "fp_arith_reg_operand")
+ (match_operand:SF 3 "fp_arith_reg_operand")))]
"TARGET_SH2E || TARGET_SHMEDIA_FPU"
{
if (TARGET_SH2E)
@@ -12285,6 +12285,43 @@ label:
"fmac.s %1, %2, %0"
[(set_attr "type" "fparith_media")])
+;; For some cases such as 'a * b + a' the FMA pattern is not generated by
+;; previous transformations. If FMA is generally allowed, let the combine
+;; pass utilize it.
+(define_insn_and_split "*fmasf4"
+ [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+ (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%w")
+ (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+ (match_operand:SF 3 "arith_reg_operand" "0")))
+ (use (match_operand:PSI 4 "fpscr_operand"))]
+ "TARGET_SH2E && flag_fp_contract_mode != FP_CONTRACT_OFF"
+ "fmac %1,%2,%0"
+ "&& can_create_pseudo_p ()"
+ [(parallel [(set (match_dup 0)
+ (fma:SF (match_dup 1) (match_dup 2) (match_dup 3)))
+ (use (match_dup 4))])]
+{
+ /* Change 'b * a + a' into 'a * b + a'.
+ This is better for register allocation. */
+ if (REGNO (operands[2]) == REGNO (operands[3]))
+ {
+ rtx tmp = operands[1];
+ operands[1] = operands[2];
+ operands[2] = tmp;
+ }
+}
+ [(set_attr "type" "fp")
+ (set_attr "fp_mode" "single")])
+
+(define_insn "*fmasf4_media"
+ [(set (match_operand:SF 0 "fp_arith_reg_operand" "=f")
+ (plus:SF (mult:SF (match_operand:SF 1 "fp_arith_reg_operand" "%f")
+ (match_operand:SF 2 "fp_arith_reg_operand" "f"))
+ (match_operand:SF 3 "fp_arith_reg_operand" "0")))]
+ "TARGET_SHMEDIA_FPU && flag_fp_contract_mode != FP_CONTRACT_OFF"
+ "fmac.s %1, %2, %0"
+ [(set_attr "type" "fparith_media")])
+
(define_expand "divsf3"
[(set (match_operand:SF 0 "arith_reg_operand" "")
(div:SF (match_operand:SF 1 "arith_reg_operand" "")
diff --git a/gcc/config/sol2.c b/gcc/config/sol2.c
index 718134e2b90..7c7c429db3d 100644
--- a/gcc/config/sol2.c
+++ b/gcc/config/sol2.c
@@ -29,7 +29,7 @@ along with GCC; see the file COPYING3. If not see
#include "tm_p.h"
#include "diagnostic-core.h"
#include "ggc.h"
-#include "hashtab.h"
+#include "hash-table.h"
tree solaris_pending_aligns, solaris_pending_inits, solaris_pending_finis;
@@ -157,10 +157,6 @@ solaris_assemble_visibility (tree decl, int vis ATTRIBUTE_UNUSED)
#endif
}
-/* Hash table of group signature symbols. */
-
-static htab_t solaris_comdat_htab;
-
/* Group section information entry stored in solaris_comdat_htab. */
typedef struct comdat_entry
@@ -171,25 +167,34 @@ typedef struct comdat_entry
const char *sig;
} comdat_entry;
-/* Helper routines for maintaining solaris_comdat_htab. */
+/* Helpers for maintaining solaris_comdat_htab. */
-static hashval_t
-comdat_hash (const void *p)
+struct comdat_entry_hasher : typed_noop_remove <comdat_entry>
+{
+ typedef comdat_entry value_type;
+ typedef comdat_entry compare_type;
+ static inline hashval_t hash (const value_type *);
+ static inline bool equal (const value_type *, const compare_type *);
+ static inline void remove (value_type *);
+};
+
+inline hashval_t
+comdat_entry_hasher::hash (const value_type *entry)
{
- const comdat_entry *entry = (const comdat_entry *) p;
-
return htab_hash_string (entry->sig);
}
-static int
-comdat_eq (const void *p1, const void *p2)
+inline bool
+comdat_entry_hasher::equal (const value_type *entry1,
+ const compare_type *entry2)
{
- const comdat_entry *entry1 = (const comdat_entry *) p1;
- const comdat_entry *entry2 = (const comdat_entry *) p2;
-
return strcmp (entry1->sig, entry2->sig) == 0;
}
+/* Hash table of group signature symbols. */
+
+static hash_table <comdat_entry_hasher> solaris_comdat_htab;
+
/* Output assembly to switch to COMDAT group section NAME with attributes
FLAGS and group signature symbol DECL, using Sun as syntax. */
@@ -229,12 +234,11 @@ solaris_elf_asm_comdat_section (const char *name, unsigned int flags, tree decl)
identify the missing ones without changing the affected frontents,
remember the signature symbols and emit those not marked
TREE_SYMBOL_REFERENCED in solaris_file_end. */
- if (solaris_comdat_htab == NULL)
- solaris_comdat_htab = htab_create_alloc (37, comdat_hash, comdat_eq, NULL,
- xcalloc, free);
+ if (!solaris_comdat_htab.is_created ())
+ solaris_comdat_htab.create (37);
entry.sig = signature;
- slot = (comdat_entry **) htab_find_slot (solaris_comdat_htab, &entry, INSERT);
+ slot = solaris_comdat_htab.find_slot (&entry, INSERT);
if (*slot == NULL)
{
@@ -250,10 +254,11 @@ solaris_elf_asm_comdat_section (const char *name, unsigned int flags, tree decl)
/* Define unreferenced COMDAT group signature symbol corresponding to SLOT. */
-static int
-solaris_define_comdat_signature (void **slot, void *aux ATTRIBUTE_UNUSED)
+int
+solaris_define_comdat_signature (comdat_entry **slot,
+ void *aux ATTRIBUTE_UNUSED)
{
- comdat_entry *entry = *(comdat_entry **) slot;
+ comdat_entry *entry = *slot;
tree decl = entry->decl;
if (TREE_CODE (decl) != IDENTIFIER_NODE)
@@ -277,10 +282,10 @@ solaris_define_comdat_signature (void **slot, void *aux ATTRIBUTE_UNUSED)
void
solaris_file_end (void)
{
- if (solaris_comdat_htab == NULL)
+ if (!solaris_comdat_htab.is_created ())
return;
- htab_traverse (solaris_comdat_htab, solaris_define_comdat_signature, NULL);
+ solaris_comdat_htab.traverse <void *, solaris_define_comdat_signature> (NULL);
}
void
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 1dc4e3600a8..d473d6fdd7f 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -11527,7 +11527,7 @@ sparc_expand_vec_perm_bmask (enum machine_mode vmode, rtx sel)
}
/* Always perform the final addition/merge within the bmask insn. */
- emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, t_1));
+ emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, t_1));
}
/* Implement TARGET_FRAME_POINTER_REQUIRED. */
@@ -11766,7 +11766,7 @@ static void
vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
enum machine_mode inner_mode)
{
- rtx t1, final_insn;
+ rtx t1, final_insn, sel;
int bmask;
t1 = gen_reg_rtx (mode);
@@ -11792,8 +11792,8 @@ vector_init_bshuffle (rtx target, rtx elt, enum machine_mode mode,
gcc_unreachable ();
}
- emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), CONST0_RTX (SImode),
- force_reg (SImode, GEN_INT (bmask))));
+ sel = force_reg (SImode, GEN_INT (bmask));
+ emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
emit_insn (final_insn);
}
diff --git a/gcc/config/sparc/sparc.md b/gcc/config/sparc/sparc.md
index 292cb205271..7f8d4250502 100644
--- a/gcc/config/sparc/sparc.md
+++ b/gcc/config/sparc/sparc.md
@@ -5499,7 +5499,7 @@
[(set (match_operand:DF 0 "register_operand" "=e")
(mult:DF (float_extend:DF (match_operand:SF 1 "register_operand" "f"))
(float_extend:DF (match_operand:SF 2 "register_operand" "f"))))]
- "(TARGET_V8 || TARGET_V9) && TARGET_FPU"
+ "(TARGET_V8 || TARGET_V9) && TARGET_FPU && !sparc_fix_ut699"
"fsmuld\t%1, %2, %0"
[(set_attr "type" "fpmul")
(set_attr "fptype" "double")])
@@ -5528,20 +5528,37 @@
"fdivq\t%1, %2, %0"
[(set_attr "type" "fpdivd")])
-(define_insn "divdf3"
+(define_expand "divdf3"
[(set (match_operand:DF 0 "register_operand" "=e")
(div:DF (match_operand:DF 1 "register_operand" "e")
(match_operand:DF 2 "register_operand" "e")))]
"TARGET_FPU"
+ "")
+
+(define_insn "*divdf3_nofix"
+ [(set (match_operand:DF 0 "register_operand" "=e")
+ (div:DF (match_operand:DF 1 "register_operand" "e")
+ (match_operand:DF 2 "register_operand" "e")))]
+ "TARGET_FPU && !sparc_fix_ut699"
"fdivd\t%1, %2, %0"
[(set_attr "type" "fpdivd")
(set_attr "fptype" "double")])
+(define_insn "*divdf3_fix"
+ [(set (match_operand:DF 0 "register_operand" "=e")
+ (div:DF (match_operand:DF 1 "register_operand" "e")
+ (match_operand:DF 2 "register_operand" "e")))]
+ "TARGET_FPU && sparc_fix_ut699"
+ "fdivd\t%1, %2, %0\n\tstd\t%0, [%%sp-8]"
+ [(set_attr "type" "fpdivd")
+ (set_attr "fptype" "double")
+ (set_attr "length" "2")])
+
(define_insn "divsf3"
[(set (match_operand:SF 0 "register_operand" "=f")
(div:SF (match_operand:SF 1 "register_operand" "f")
(match_operand:SF 2 "register_operand" "f")))]
- "TARGET_FPU"
+ "TARGET_FPU && !sparc_fix_ut699"
"fdivs\t%1, %2, %0"
[(set_attr "type" "fpdivs")])
@@ -5742,18 +5759,33 @@
"fsqrtq\t%1, %0"
[(set_attr "type" "fpsqrtd")])
-(define_insn "sqrtdf2"
+(define_expand "sqrtdf2"
[(set (match_operand:DF 0 "register_operand" "=e")
(sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
"TARGET_FPU"
+ "")
+
+(define_insn "*sqrtdf2_nofix"
+ [(set (match_operand:DF 0 "register_operand" "=e")
+ (sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+ "TARGET_FPU && !sparc_fix_ut699"
"fsqrtd\t%1, %0"
[(set_attr "type" "fpsqrtd")
(set_attr "fptype" "double")])
+(define_insn "*sqrtdf2_fix"
+ [(set (match_operand:DF 0 "register_operand" "=e")
+ (sqrt:DF (match_operand:DF 1 "register_operand" "e")))]
+ "TARGET_FPU && sparc_fix_ut699"
+ "fsqrtd\t%1, %0\n\tstd\t%0, [%%sp-8]"
+ [(set_attr "type" "fpsqrtd")
+ (set_attr "fptype" "double")
+ (set_attr "length" "2")])
+
(define_insn "sqrtsf2"
[(set (match_operand:SF 0 "register_operand" "=f")
(sqrt:SF (match_operand:SF 1 "register_operand" "f")))]
- "TARGET_FPU"
+ "TARGET_FPU && !sparc_fix_ut699"
"fsqrts\t%1, %0"
[(set_attr "type" "fpsqrts")])
@@ -8557,7 +8589,7 @@
mask |= (INTVAL (XVECEXP (sel, 0, i)) & 0xf) << (28 - i*4);
sel = force_reg (SImode, gen_int_mode (mask, SImode));
- emit_insn (gen_bmasksi_vis (gen_reg_rtx (SImode), sel, const0_rtx));
+ emit_insn (gen_bmasksi_vis (gen_rtx_REG (SImode, 0), sel, const0_rtx));
emit_insn (gen_bshufflev8qi_vis (operands[0], operands[1], operands[2]));
DONE;
})
diff --git a/gcc/config/sparc/sparc.opt b/gcc/config/sparc/sparc.opt
index 764c652e837..3b50c6c21f9 100644
--- a/gcc/config/sparc/sparc.opt
+++ b/gcc/config/sparc/sparc.opt
@@ -201,6 +201,10 @@ Target Report RejectNegative Var(sparc_fix_at697f)
Enable workaround for single erratum of AT697F processor
(corresponding to erratum #13 of AT697E processor)
+mfix-ut699
+Target Report RejectNegative Var(sparc_fix_ut699)
+Enable workarounds for the FP errata of the UT699 processor
+
Mask(LONG_DOUBLE_128)
;; Use 128-bit long double
diff --git a/gcc/config/t-sol2 b/gcc/config/t-sol2
index 25b825017f6..142de89de95 100644
--- a/gcc/config/t-sol2
+++ b/gcc/config/t-sol2
@@ -34,5 +34,5 @@ sol2-stubs.o: $(srcdir)/config/sol2-stubs.c $(CONFIG_H) $(SYSTEM_H) coretypes.h
# Solaris-specific attributes
sol2.o: $(srcdir)/config/sol2.c $(CONFIG_H) $(SYSTEM_H) coretypes.h \
- tree.h output.h $(TM_H) $(TARGET_H) $(TM_P_H) $(GGC_H)
+ tree.h output.h $(TM_H) $(TARGET_H) $(TM_P_H) $(GGC_H) $(HASH_TABLE_H)
$(COMPILER) -c $(ALL_COMPILERFLAGS) $(ALL_CPPFLAGS) $(INCLUDES) $<