summaryrefslogtreecommitdiff
path: root/gcc/config/arm/neon.md
diff options
context:
space:
mode:
Diffstat (limited to 'gcc/config/arm/neon.md')
-rw-r--r--gcc/config/arm/neon.md799
1 files changed, 509 insertions, 290 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 43b3805c7ba..15f347c9607 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -22,17 +22,12 @@
(define_constants
[(UNSPEC_ASHIFT_SIGNED 65)
(UNSPEC_ASHIFT_UNSIGNED 66)
- (UNSPEC_VABA 67)
- (UNSPEC_VABAL 68)
(UNSPEC_VABD 69)
(UNSPEC_VABDL 70)
- (UNSPEC_VABS 71)
(UNSPEC_VADD 72)
(UNSPEC_VADDHN 73)
(UNSPEC_VADDL 74)
(UNSPEC_VADDW 75)
- (UNSPEC_VAND 76)
- (UNSPEC_VBIC 77)
(UNSPEC_VBSL 78)
(UNSPEC_VCAGE 79)
(UNSPEC_VCAGT 80)
@@ -40,18 +35,9 @@
(UNSPEC_VCGE 82)
(UNSPEC_VCGT 83)
(UNSPEC_VCLS 84)
- (UNSPEC_VCLZ 85)
- (UNSPEC_VCNT 86)
- (UNSPEC_VCOMBINE 87)
(UNSPEC_VCVT 88)
(UNSPEC_VCVT_N 89)
- (UNSPEC_VDUP_LANE 90)
- (UNSPEC_VDUP_N 91)
- (UNSPEC_VEOR 92)
(UNSPEC_VEXT 93)
- (UNSPEC_VGET_HIGH 94)
- (UNSPEC_VGET_LANE 95)
- (UNSPEC_VGET_LOW 96)
(UNSPEC_VHADD 97)
(UNSPEC_VHSUB 98)
(UNSPEC_VLD1 99)
@@ -86,10 +72,6 @@
(UNSPEC_VMULL 128)
(UNSPEC_VMUL_LANE 129)
(UNSPEC_VMULL_LANE 130)
- (UNSPEC_VMUL_N 131)
- (UNSPEC_VMVN 132)
- (UNSPEC_VORN 133)
- (UNSPEC_VORR 134)
(UNSPEC_VPADAL 135)
(UNSPEC_VPADD 136)
(UNSPEC_VPADDL 137)
@@ -125,7 +107,6 @@
(UNSPEC_VREV64 167)
(UNSPEC_VRSQRTE 168)
(UNSPEC_VRSQRTS 169)
- (UNSPEC_VSET_LANE 170)
(UNSPEC_VSHL 171)
(UNSPEC_VSHLL_N 172)
(UNSPEC_VSHL_N 173)
@@ -185,9 +166,6 @@
;; Opaque structure types wider than TImode.
(define_mode_iterator VSTRUCT [EI OI CI XI])
-;; Number of instructions needed to load/store struct elements. FIXME!
-(define_mode_attr V_slen [(EI "2") (OI "2") (CI "3") (XI "4")])
-
;; Opaque structure types used in table lookups (except vtbl1/vtbx1).
(define_mode_iterator VTAB [TI EI OI])
@@ -335,6 +313,14 @@
(V4HI "V2SI") (V8HI "V4SI")
(V2SI "DI") (V4SI "V2DI")])
+;; Double-sized modes with the same element size.
+;; Used for neon_vdup_lane, where the second operand is double-sized
+;; even when the first one is quad.
+(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI")
+ (V4SI "V2SI") (V4SF "V2SF")
+ (V8QI "V8QI") (V4HI "V4HI")
+ (V2SI "V2SI") (V2SF "V2SF")])
+
;; Mode of result of comparison operations (and bit-select operand 1).
(define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI")
(V4HI "V4HI") (V8HI "V8HI")
@@ -587,7 +573,7 @@
}
}
[(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_ldm_2")
- (set_attr "length" "<V_slen>,<V_slen>,<V_slen>")])
+ (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
(define_split
[(set (match_operand:EI 0 "s_register_operand" "")
@@ -688,7 +674,7 @@
elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
operands[2] = GEN_INT (elt);
- return "vmov%?.<V_uf_sclr>\t%P0[%c2], %1";
+ return "vmov%?.<V_sz_elem>\t%P0[%c2], %1";
}
[(set_attr "predicable" "yes")
(set_attr "neon_type" "neon_mcr")])
@@ -714,7 +700,7 @@
operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
operands[2] = GEN_INT (elt);
- return "vmov%?.<V_uf_sclr>\t%P0[%c2], %1";
+ return "vmov%?.<V_sz_elem>\t%P0[%c2], %1";
}
[(set_attr "predicable" "yes")
(set_attr "neon_type" "neon_mcr")]
@@ -734,7 +720,7 @@
operands[0] = gen_rtx_REG (DImode, regno);
- return "vmov%?.64\t%P0, %Q1, %R1";
+ return "vmov%?\t%P0, %Q1, %R1";
}
[(set_attr "predicable" "yes")
(set_attr "neon_type" "neon_mcr_2_mcrr")]
@@ -802,11 +788,11 @@
(parallel [(match_operand:SI 2 "immediate_operand" "i")])))]
"TARGET_NEON"
{
- int regno = REGNO (operands[1]) + INTVAL (operands[2]);
+ int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
operands[1] = gen_rtx_REG (DImode, regno);
- return "vmov%?.64\t%Q0, %R0, %P1";
+ return "vmov%?\t%Q0, %R0, %P1 @ v2di";
}
[(set_attr "predicable" "yes")
(set_attr "neon_type" "neon_int_1")]
@@ -823,17 +809,14 @@
;; Doubleword and quadword arithmetic.
-;; NOTE: vadd/vsub and some other instructions also support 64-bit integer
-;; element size, which we could potentially use for "long long" operations. We
-;; don't want to do this at present though, because moving values from the
-;; vector unit to the ARM core is currently slow and 64-bit addition (etc.) is
-;; easy to do with ARM instructions anyway.
+;; NOTE: some other instructions also support 64-bit integer
+;; element size, which we could potentially use for "long long" operations.
(define_insn "*add<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -843,11 +826,31 @@
(const_string "neon_int_1")))]
)
+(define_insn "adddi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
+ (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0")
+ (match_operand:DI 2 "s_register_operand" "w,r,0")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vadd.i64\t%P0, %P1, %P2";
+ case 1: return "#";
+ case 2: return "#";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_1,*,*")
+ (set_attr "conds" "*,clob,clob")
+ (set_attr "length" "*,8,8")]
+)
+
(define_insn "*sub<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -857,11 +860,32 @@
(const_string "neon_int_2")))]
)
+(define_insn "subdi3_neon"
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r")
+ (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0")
+ (match_operand:DI 2 "s_register_operand" "w,r,0,0")))
+ (clobber (reg:CC CC_REGNUM))]
+ "TARGET_NEON"
+{
+ switch (which_alternative)
+ {
+ case 0: return "vsub.i64\t%P0, %P1, %P2";
+ case 1: /* fall through */
+ case 2: /* fall through */
+ case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
+ default: gcc_unreachable ();
+ }
+}
+ [(set_attr "neon_type" "neon_int_2,*,*,*")
+ (set_attr "conds" "*,clob,clob,clob")
+ (set_attr "length" "*,8,8,8")]
+)
+
(define_insn "*mul<mode>3_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
(match_operand:VDQ 2 "s_register_operand" "w")))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -878,12 +902,12 @@
(const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
)
-(define_insn "*mul<mode>3add<mode>_neon"
+(define_insn "mul<mode>3add<mode>_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
(match_operand:VDQ 3 "s_register_operand" "w"))
(match_operand:VDQ 1 "s_register_operand" "0")))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -900,12 +924,12 @@
(const_string "neon_mla_qqq_32_qqd_32_scalar")))))]
)
-(define_insn "*mul<mode>3neg<mode>add<mode>_neon"
+(define_insn "mul<mode>3neg<mode>add<mode>_neon"
[(set (match_operand:VDQ 0 "s_register_operand" "=w")
(minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0")
(mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w")
(match_operand:VDQ 3 "s_register_operand" "w"))))]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
"vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
(if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0))
@@ -940,10 +964,9 @@
)
(define_insn "iordi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w,w")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "w,0")
- (match_operand:DI 2 "neon_logic_op2" "w,Dl")]
- UNSPEC_VORR))]
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r")
+ (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r")
+ (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r")))]
"TARGET_NEON"
{
switch (which_alternative)
@@ -951,10 +974,13 @@
case 0: return "vorr\t%P0, %P1, %P2";
case 1: return neon_output_logic_immediate ("vorr", &operands[2],
DImode, 0, VALID_NEON_QREG_MODE (DImode));
+ case 2: return "#";
+ case 3: return "#";
default: gcc_unreachable ();
}
}
- [(set_attr "neon_type" "neon_int_1")]
+ [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*")
+ (set_attr "length" "*,*,8,8")]
)
;; The concrete forms of the Neon immediate-logic instructions are vbic and
@@ -980,10 +1006,9 @@
)
(define_insn "anddi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w,w")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "w,0")
- (match_operand:DI 2 "neon_inv_logic_op2" "w,DL")]
- UNSPEC_VAND))]
+ [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r")
+ (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r")
+ (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r")))]
"TARGET_NEON"
{
switch (which_alternative)
@@ -991,10 +1016,13 @@
case 0: return "vand\t%P0, %P1, %P2";
case 1: return neon_output_logic_immediate ("vand", &operands[2],
DImode, 1, VALID_NEON_QREG_MODE (DImode));
+ case 2: return "#";
+ case 3: return "#";
default: gcc_unreachable ();
}
}
- [(set_attr "neon_type" "neon_int_1")]
+ [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*")
+ (set_attr "length" "*,*,8,8")]
)
(define_insn "orn<mode>3_neon"
@@ -1007,13 +1035,16 @@
)
(define_insn "orndi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
- (match_operand:DI 2 "s_register_operand" "w")]
- UNSPEC_VORN))]
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
+ (ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0")
+ (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))]
"TARGET_NEON"
- "vorn\t%P0, %P1, %P2"
- [(set_attr "neon_type" "neon_int_1")]
+ "@
+ vorn\t%P0, %P1, %P2
+ #
+ #"
+ [(set_attr "neon_type" "neon_int_1,*,*")
+ (set_attr "length" "*,8,8")]
)
(define_insn "bic<mode>3_neon"
@@ -1025,14 +1056,18 @@
[(set_attr "neon_type" "neon_int_1")]
)
+;; Compare to *anddi_notdi_di.
(define_insn "bicdi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
- (match_operand:DI 2 "s_register_operand" "w")]
- UNSPEC_VBIC))]
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r")
+ (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0"))
+ (match_operand:DI 1 "s_register_operand" "w,0,r")))]
"TARGET_NEON"
- "vbic\t%P0, %P1, %P2"
- [(set_attr "neon_type" "neon_int_1")]
+ "@
+ vbic\t%P0, %P1, %P2
+ #
+ #"
+ [(set_attr "neon_type" "neon_int_1,*,*")
+ (set_attr "length" "*,8,8")]
)
(define_insn "xor<mode>3"
@@ -1045,13 +1080,16 @@
)
(define_insn "xordi3_neon"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
- (match_operand:DI 2 "s_register_operand" "w")]
- UNSPEC_VEOR))]
+ [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r")
+ (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r")
+ (match_operand:DI 2 "s_register_operand" "w,r,r")))]
"TARGET_NEON"
- "veor\t%P0, %P1, %P2"
- [(set_attr "neon_type" "neon_int_1")]
+ "@
+ veor\t%P0, %P1, %P2
+ #
+ #"
+ [(set_attr "neon_type" "neon_int_1,*,*")
+ (set_attr "length" "*,8,8")]
)
(define_insn "one_cmpl<mode>2"
@@ -1320,7 +1358,7 @@
(parallel [(const_int 0) (const_int 1)]))
(vec_select:V2SF (match_dup 1)
(parallel [(const_int 2) (const_int 3)]))))]
- "TARGET_NEON"
+ "TARGET_NEON && flag_unsafe_math_optimizations"
"<VQH_mnem>.f32\t%P0, %e1, %f1"
[(set_attr "vqh_mnem" "<VQH_mnem>")
(set (attr "neon_type")
@@ -1455,7 +1493,7 @@
(define_expand "reduc_splus_<mode>"
[(match_operand:VD 0 "s_register_operand" "")
(match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
&gen_neon_vpadd_internal<mode>);
@@ -1465,7 +1503,7 @@
(define_expand "reduc_splus_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1500,7 +1538,7 @@
(define_expand "reduc_smin_<mode>"
[(match_operand:VD 0 "s_register_operand" "")
(match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
&gen_neon_vpsmin<mode>);
@@ -1510,7 +1548,7 @@
(define_expand "reduc_smin_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1525,7 +1563,7 @@
(define_expand "reduc_smax_<mode>"
[(match_operand:VD 0 "s_register_operand" "")
(match_operand:VD 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
neon_pairwise_reduce (operands[0], operands[1], <MODE>mode,
&gen_neon_vpsmax<mode>);
@@ -1535,7 +1573,7 @@
(define_expand "reduc_smax_<mode>"
[(match_operand:VQ 0 "s_register_operand" "")
(match_operand:VQ 1 "s_register_operand" "")]
- "TARGET_NEON"
+ "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
{
rtx step1 = gen_reg_rtx (<V_HALF>mode);
rtx res_d = gen_reg_rtx (<V_HALF>mode);
@@ -1711,11 +1749,37 @@
; good for plain vadd, vaddq.
-(define_insn "neon_vadd<mode>"
+(define_expand "neon_vadd<mode>"
+ [(match_operand:VDQX 0 "s_register_operand" "=w")
+ (match_operand:VDQX 1 "s_register_operand" "w")
+ (match_operand:VDQX 2 "s_register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
+ else
+ emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
+; Note that NEON operations don't support the full IEEE 754 standard: in
+; particular, denormal values are flushed to zero. This means that GCC cannot
+; use those instructions for autovectorization, etc. unless
+; -funsafe-math-optimizations is in effect (in which case flush-to-zero
+; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h
+; header) must work in either case: if -funsafe-math-optimizations is given,
+; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
+; expand to unspecs (which may potentially limit the extent to which they might
+; be optimized by generic code).
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vadd<mode>_unspec"
[(set (match_operand:VDQX 0 "s_register_operand" "=w")
(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
- (match_operand:VDQX 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
+ (match_operand:VDQX 2 "s_register_operand" "w")]
UNSPEC_VADD))]
"TARGET_NEON"
"vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
@@ -1788,6 +1852,8 @@
[(set_attr "neon_type" "neon_int_4")]
)
+;; We cannot replace this unspec with mul<mode>3 because of the odd
+;; polynomial multiplication case that can specified by operand 3.
(define_insn "neon_vmul<mode>"
[(set (match_operand:VDQW 0 "s_register_operand" "=w")
(unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
@@ -1811,13 +1877,31 @@
(const_string "neon_mul_qqq_8_16_32_ddd_32")))))]
)
-(define_insn "neon_vmla<mode>"
- [(set (match_operand:VDQW 0 "s_register_operand" "=w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
- (match_operand:VDQW 2 "s_register_operand" "w")
- (match_operand:VDQW 3 "s_register_operand" "w")
- (match_operand:SI 4 "immediate_operand" "i")]
- UNSPEC_VMLA))]
+(define_expand "neon_vmla<mode>"
+ [(match_operand:VDQW 0 "s_register_operand" "=w")
+ (match_operand:VDQW 1 "s_register_operand" "0")
+ (match_operand:VDQW 2 "s_register_operand" "w")
+ (match_operand:VDQW 3 "s_register_operand" "w")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
+ operands[2], operands[3]));
+ else
+ emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vmla<mode>_unspec"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0")
+ (match_operand:VDQ 2 "s_register_operand" "w")
+ (match_operand:VDQ 3 "s_register_operand" "w")]
+ UNSPEC_VMLA))]
"TARGET_NEON"
"vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
@@ -1850,13 +1934,31 @@
(const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))]
)
-(define_insn "neon_vmls<mode>"
- [(set (match_operand:VDQW 0 "s_register_operand" "=w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
- (match_operand:VDQW 2 "s_register_operand" "w")
- (match_operand:VDQW 3 "s_register_operand" "w")
- (match_operand:SI 4 "immediate_operand" "i")]
- UNSPEC_VMLS))]
+(define_expand "neon_vmls<mode>"
+ [(match_operand:VDQW 0 "s_register_operand" "=w")
+ (match_operand:VDQW 1 "s_register_operand" "0")
+ (match_operand:VDQW 2 "s_register_operand" "w")
+ (match_operand:VDQW 3 "s_register_operand" "w")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
+ operands[1], operands[2], operands[3]));
+ else
+ emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
+ operands[2], operands[3]));
+ DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vmls<mode>_unspec"
+ [(set (match_operand:VDQ 0 "s_register_operand" "=w")
+ (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0")
+ (match_operand:VDQ 2 "s_register_operand" "w")
+ (match_operand:VDQ 3 "s_register_operand" "w")]
+ UNSPEC_VMLS))]
"TARGET_NEON"
"vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
@@ -1966,11 +2068,27 @@
(const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))]
)
-(define_insn "neon_vsub<mode>"
+(define_expand "neon_vsub<mode>"
+ [(match_operand:VDQX 0 "s_register_operand" "=w")
+ (match_operand:VDQX 1 "s_register_operand" "w")
+ (match_operand:VDQX 2 "s_register_operand" "w")
+ (match_operand:SI 3 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ if (!<Is_float_mode> || flag_unsafe_math_optimizations)
+ emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
+ else
+ emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
+
+; Used for intrinsics when flag_unsafe_math_optimizations is false.
+
+(define_insn "neon_vsub<mode>_unspec"
[(set (match_operand:VDQX 0 "s_register_operand" "=w")
(unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
- (match_operand:VDQX 2 "s_register_operand" "w")
- (match_operand:SI 3 "immediate_operand" "i")]
+ (match_operand:VDQX 2 "s_register_operand" "w")]
UNSPEC_VSUB))]
"TARGET_NEON"
"vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
@@ -2153,11 +2271,11 @@
(define_insn "neon_vaba<mode>"
[(set (match_operand:VDQIW 0 "s_register_operand" "=w")
- (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "0")
- (match_operand:VDQIW 2 "s_register_operand" "w")
- (match_operand:VDQIW 3 "s_register_operand" "w")
- (match_operand:SI 4 "immediate_operand" "i")]
- UNSPEC_VABA))]
+ (plus:VDQIW (match_operand:VDQIW 1 "s_register_operand" "0")
+ (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
+ (match_operand:VDQIW 3 "s_register_operand" "w")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ UNSPEC_VABD)))]
"TARGET_NEON"
"vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
[(set (attr "neon_type")
@@ -2167,11 +2285,11 @@
(define_insn "neon_vabal<mode>"
[(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
- (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
- (match_operand:VW 2 "s_register_operand" "w")
- (match_operand:VW 3 "s_register_operand" "w")
- (match_operand:SI 4 "immediate_operand" "i")]
- UNSPEC_VABAL))]
+ (plus:<V_widen> (match_operand:<V_widen> 1 "s_register_operand" "0")
+ (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
+ (match_operand:VW 3 "s_register_operand" "w")
+ (match_operand:SI 4 "immediate_operand" "i")]
+ UNSPEC_VABDL)))]
"TARGET_NEON"
"vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3"
[(set_attr "neon_type" "neon_vaba")]
@@ -2302,22 +2420,15 @@
(const_string "neon_fp_vrecps_vrsqrts_qqq")))]
)
-(define_insn "neon_vabs<mode>"
- [(set (match_operand:VDQW 0 "s_register_operand" "=w")
- (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
- UNSPEC_VABS))]
+(define_expand "neon_vabs<mode>"
+ [(match_operand:VDQW 0 "s_register_operand" "")
+ (match_operand:VDQW 1 "s_register_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")]
"TARGET_NEON"
- "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
- [(set (attr "neon_type")
- (if_then_else (ior (ne (symbol_ref "<Is_float_mode>") (const_int 0))
- (ne (symbol_ref "<Is_float_mode>") (const_int 0)))
- (if_then_else
- (ne (symbol_ref "<Is_d_reg>") (const_int 0))
- (const_string "neon_fp_vadd_ddd_vabs_dd")
- (const_string "neon_fp_vadd_qqq_vabs_qq"))
- (const_string "neon_vqneg_vqabs")))]
-)
+{
+ emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
+ DONE;
+})
(define_insn "neon_vqabs<mode>"
[(set (match_operand:VDQIW 0 "s_register_operand" "=w")
@@ -2359,26 +2470,42 @@
[(set_attr "neon_type" "neon_int_1")]
)
-(define_insn "neon_vclz<mode>"
+(define_insn "clz<mode>2"
[(set (match_operand:VDQIW 0 "s_register_operand" "=w")
- (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
- UNSPEC_VCLZ))]
+ (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
"TARGET_NEON"
"vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
[(set_attr "neon_type" "neon_int_1")]
)
-(define_insn "neon_vcnt<mode>"
+(define_expand "neon_vclz<mode>"
+ [(match_operand:VDQIW 0 "s_register_operand" "")
+ (match_operand:VDQIW 1 "s_register_operand" "")
+ (match_operand:SI 2 "immediate_operand" "")]
+ "TARGET_NEON"
+{
+ emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
+ DONE;
+})
+
+(define_insn "popcount<mode>2"
[(set (match_operand:VE 0 "s_register_operand" "=w")
- (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
- UNSPEC_VCNT))]
+ (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
"TARGET_NEON"
"vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
[(set_attr "neon_type" "neon_int_1")]
)
+(define_expand "neon_vcnt<mode>"
+ [(match_operand:VE 0 "s_register_operand" "=w")
+ (match_operand:VE 1 "s_register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
+ "TARGET_NEON"
+{
+ emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
+ DONE;
+})
+
(define_insn "neon_vrecpe<mode>"
[(set (match_operand:V32 0 "s_register_operand" "=w")
(unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")
@@ -2555,126 +2682,65 @@
; Operand 3 (info word) is ignored because it does nothing useful with 64-bit
; elements.
-(define_insn "neon_vget_lanedi"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VGET_LANE))]
+(define_expand "neon_vget_lanedi"
+ [(match_operand:DI 0 "s_register_operand" "=r")
+ (match_operand:DI 1 "s_register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")
+ (match_operand:SI 3 "immediate_operand" "i")]
"TARGET_NEON"
{
neon_lane_bounds (operands[2], 0, 1);
- return "vmov%?\t%Q0, %R0, %P1 @ di";
-}
- [(set_attr "predicable" "yes")
- (set_attr "neon_type" "neon_bp_simple")]
-)
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
-(define_insn "neon_vget_lanev2di"
- [(set (match_operand:DI 0 "s_register_operand" "=r")
- (unspec:DI [(match_operand:V2DI 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VGET_LANE))]
+(define_expand "neon_vget_lanev2di"
+ [(match_operand:DI 0 "s_register_operand" "=r")
+ (match_operand:V2DI 1 "s_register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")
+ (match_operand:SI 3 "immediate_operand" "i")]
"TARGET_NEON"
{
- rtx ops[2];
- unsigned int regno = REGNO (operands[1]);
- unsigned int elt = INTVAL (operands[2]);
-
neon_lane_bounds (operands[2], 0, 2);
+ emit_insn (gen_vec_extractv2di (operands[0], operands[1], operands[2]));
+ DONE;
+})
- ops[0] = operands[0];
- ops[1] = gen_rtx_REG (DImode, regno + 2 * elt);
- output_asm_insn ("vmov%?\t%Q0, %R0, %P1 @ v2di", ops);
-
- return "";
-}
- [(set_attr "predicable" "yes")
- (set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vset_lane<mode>"
- [(set (match_operand:VD 0 "s_register_operand" "=w")
- (unspec:VD [(match_operand:<V_elem> 1 "s_register_operand" "r")
- (match_operand:VD 2 "s_register_operand" "0")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VSET_LANE))]
- "TARGET_NEON"
-{
- neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
- return "vmov%?.<V_sz_elem>\t%P0[%c3], %1";
-}
- [(set_attr "predicable" "yes")
- (set_attr "neon_type" "neon_bp_simple")]
-)
-
-; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
-
-(define_insn "neon_vset_lanedi"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "r")
- (match_operand:DI 2 "s_register_operand" "0")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VSET_LANE))]
- "TARGET_NEON"
-{
- neon_lane_bounds (operands[3], 0, 1);
- return "vmov%?\t%P0, %Q1, %R1 @ di";
-}
- [(set_attr "predicable" "yes")
- (set_attr "neon_type" "neon_bp_simple")]
-)
-
-(define_insn "neon_vset_lane<mode>"
- [(set (match_operand:VQ 0 "s_register_operand" "=w")
- (unspec:VQ [(match_operand:<V_elem> 1 "s_register_operand" "r")
- (match_operand:VQ 2 "s_register_operand" "0")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VSET_LANE))]
+(define_expand "neon_vset_lane<mode>"
+ [(match_operand:VDQ 0 "s_register_operand" "=w")
+ (match_operand:<V_elem> 1 "s_register_operand" "r")
+ (match_operand:VDQ 2 "s_register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")]
"TARGET_NEON"
{
- rtx ops[4];
- unsigned int regno = REGNO (operands[0]);
- unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
unsigned int elt = INTVAL (operands[3]);
+ neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
- neon_lane_bounds (operands[3], 0, halfelts * 2);
+ if (BYTES_BIG_ENDIAN)
+ {
+ unsigned int reg_nelts
+ = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode));
+ elt ^= reg_nelts - 1;
+ }
- ops[0] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
- ops[1] = operands[1];
- ops[2] = GEN_INT (elt % halfelts);
- output_asm_insn ("vmov%?.<V_sz_elem>\t%P0[%c2], %1", ops);
+ emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
+ GEN_INT (1 << elt), operands[2]));
+ DONE;
+})
- return "";
-}
- [(set_attr "predicable" "yes")
- (set_attr "neon_type" "neon_bp_simple")]
-)
+; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
-(define_insn "neon_vset_lanev2di"
- [(set (match_operand:V2DI 0 "s_register_operand" "=w")
- (unspec:V2DI [(match_operand:DI 1 "s_register_operand" "r")
- (match_operand:V2DI 2 "s_register_operand" "0")
- (match_operand:SI 3 "immediate_operand" "i")]
- UNSPEC_VSET_LANE))]
+(define_expand "neon_vset_lanedi"
+ [(match_operand:DI 0 "s_register_operand" "=w")
+ (match_operand:DI 1 "s_register_operand" "r")
+ (match_operand:DI 2 "s_register_operand" "0")
+ (match_operand:SI 3 "immediate_operand" "i")]
"TARGET_NEON"
{
- rtx ops[2];
- unsigned int regno = REGNO (operands[0]);
- unsigned int elt = INTVAL (operands[3]);
-
- neon_lane_bounds (operands[3], 0, 2);
-
- ops[0] = gen_rtx_REG (DImode, regno + 2 * elt);
- ops[1] = operands[1];
- output_asm_insn ("vmov%?\t%P0, %Q1, %R1 @ v2di", ops);
-
- return "";
-}
- [(set_attr "predicable" "yes")
- (set_attr "neon_type" "neon_bp_simple")]
-)
+ neon_lane_bounds (operands[3], 0, 1);
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+})
(define_expand "neon_vcreate<mode>"
[(match_operand:VDX 0 "s_register_operand" "")
@@ -2688,8 +2754,7 @@
(define_insn "neon_vdup_n<mode>"
[(set (match_operand:VX 0 "s_register_operand" "=w")
- (unspec:VX [(match_operand:<V_elem> 1 "s_register_operand" "r")]
- UNSPEC_VDUP_N))]
+ (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
"TARGET_NEON"
"vdup%?.<V_sz_elem>\t%<V_reg>0, %1"
;; Assume this schedules like vmov.
@@ -2699,8 +2764,7 @@
(define_insn "neon_vdup_n<mode>"
[(set (match_operand:V32 0 "s_register_operand" "=w,w")
- (unspec:V32 [(match_operand:<V_elem> 1 "s_register_operand" "r,t")]
- UNSPEC_VDUP_N))]
+ (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
"TARGET_NEON"
"@
vdup%?.<V_sz_elem>\t%<V_reg>0, %1
@@ -2710,61 +2774,76 @@
(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "neon_vdup_ndi"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "r")]
- UNSPEC_VDUP_N))]
+(define_expand "neon_vdup_ndi"
+ [(match_operand:DI 0 "s_register_operand" "=w")
+ (match_operand:DI 1 "s_register_operand" "r")]
"TARGET_NEON"
- "vmov%?\t%P0, %Q1, %R1"
- [(set_attr "predicable" "yes")
- (set_attr "neon_type" "neon_bp_simple")]
+{
+ emit_move_insn (operands[0], operands[1]);
+ DONE;
+}
)
(define_insn "neon_vdup_nv2di"
- [(set (match_operand:V2DI 0 "s_register_operand" "=w")
- (unspec:V2DI [(match_operand:DI 1 "s_register_operand" "r")]
- UNSPEC_VDUP_N))]
+ [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
+ (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
"TARGET_NEON"
- "vmov%?\t%e0, %Q1, %R1\;vmov%?\t%f0, %Q1, %R1"
+ "@
+ vmov%?\t%e0, %Q1, %R1\;vmov%?\t%f0, %Q1, %R1
+ vmov%?\t%e0, %P1\;vmov%?\t%f0, %P1"
[(set_attr "predicable" "yes")
(set_attr "length" "8")
(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "neon_vdup_lane<mode>"
- [(set (match_operand:VD 0 "s_register_operand" "=w")
- (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
- UNSPEC_VDUP_LANE))]
+(define_insn "neon_vdup_lane<mode>_internal"
+ [(set (match_operand:VDQW 0 "s_register_operand" "=w")
+ (vec_duplicate:VDQW
+ (vec_select:<V_elem>
+ (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+ (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode));
- return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
+ if (BYTES_BIG_ENDIAN)
+ {
+ int elt = INTVAL (operands[2]);
+ elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
+ operands[2] = GEN_INT (elt);
+ }
+ if (<Is_d_reg>)
+ return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
+ else
+ return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
}
;; Assume this schedules like vmov.
[(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "neon_vdup_lane<mode>"
- [(set (match_operand:VQ 0 "s_register_operand" "=w")
- (unspec:VQ [(match_operand:<V_HALF> 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
- UNSPEC_VDUP_LANE))]
+(define_expand "neon_vdup_lane<mode>"
+ [(match_operand:VDQW 0 "s_register_operand" "=w")
+ (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
"TARGET_NEON"
{
- neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_HALF>mode));
- return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
-}
- ;; Assume this schedules like vmov.
- [(set_attr "neon_type" "neon_bp_simple")]
-)
+ neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode));
+ if (BYTES_BIG_ENDIAN)
+ {
+ unsigned int elt = INTVAL (operands[2]);
+ unsigned int reg_nelts
+ = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode));
+ elt ^= reg_nelts - 1;
+ operands[2] = GEN_INT (elt);
+ }
+ emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
+ operands[2]));
+ DONE;
+})
; Scalar index is ignored, since only zero is valid here.
(define_expand "neon_vdup_lanedi"
- [(set (match_operand:DI 0 "s_register_operand" "=w")
- (unspec:DI [(match_operand:DI 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
- UNSPEC_VDUP_LANE))]
+ [(match_operand:DI 0 "s_register_operand" "=w")
+ (match_operand:DI 1 "s_register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
"TARGET_NEON"
{
neon_lane_bounds (operands[2], 0, 1);
@@ -2772,20 +2851,17 @@
DONE;
})
-; Likewise.
-(define_insn "neon_vdup_lanev2di"
- [(set (match_operand:V2DI 0 "s_register_operand" "=w")
- (unspec:V2DI [(match_operand:DI 1 "s_register_operand" "w")
- (match_operand:SI 2 "immediate_operand" "i")]
- UNSPEC_VDUP_LANE))]
+; Likewise for v2di, as the DImode second operand has only a single element.
+(define_expand "neon_vdup_lanev2di"
+ [(match_operand:V2DI 0 "s_register_operand" "=w")
+ (match_operand:DI 1 "s_register_operand" "w")
+ (match_operand:SI 2 "immediate_operand" "i")]
"TARGET_NEON"
{
neon_lane_bounds (operands[2], 0, 1);
- return "vmov\t%e0, %P1\;vmov\t%f0, %P1";
-}
- [(set_attr "length" "8")
- (set_attr "neon_type" "neon_bp_simple")]
-)
+ emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
+ DONE;
+})
;; In this insn, operand 1 should be low, and operand 2 the high part of the
;; dest vector.
@@ -2796,9 +2872,8 @@
(define_insn "neon_vcombine<mode>"
[(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
- (unspec:<V_DOUBLE> [(match_operand:VDX 1 "s_register_operand" "w")
- (match_operand:VDX 2 "s_register_operand" "w")]
- UNSPEC_VCOMBINE))]
+ (vec_concat:<V_DOUBLE> (match_operand:VDX 1 "s_register_operand" "w")
+ (match_operand:VDX 2 "s_register_operand" "w")))]
"TARGET_NEON"
{
int dest = REGNO (operands[0]);
@@ -2838,10 +2913,31 @@
(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "neon_vget_high<mode>"
- [(set (match_operand:<V_HALF> 0 "s_register_operand" "=w")
- (unspec:<V_HALF> [(match_operand:VQX 1 "s_register_operand" "w")]
- UNSPEC_VGET_HIGH))]
+(define_insn "neon_vget_highv16qi"
+ [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+ (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+ (parallel [(const_int 8) (const_int 9)
+ (const_int 10) (const_int 11)
+ (const_int 12) (const_int 13)
+ (const_int 14) (const_int 15)])))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src + 2)
+ return "vmov\t%P0, %f1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_highv8hi"
+ [(set (match_operand:V4HI 0 "s_register_operand" "=w")
+ (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+ (parallel [(const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
"TARGET_NEON"
{
int dest = REGNO (operands[0]);
@@ -2855,10 +2951,133 @@
[(set_attr "neon_type" "neon_bp_simple")]
)
-(define_insn "neon_vget_low<mode>"
- [(set (match_operand:<V_HALF> 0 "s_register_operand" "=w")
- (unspec:<V_HALF> [(match_operand:VQX 1 "s_register_operand" "w")]
- UNSPEC_VGET_LOW))]
+(define_insn "neon_vget_highv4si"
+ [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+ (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src + 2)
+ return "vmov\t%P0, %f1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_highv4sf"
+ [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+ (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+ (parallel [(const_int 2) (const_int 3)])))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src + 2)
+ return "vmov\t%P0, %f1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_highv2di"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
+ (parallel [(const_int 1)])))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src + 2)
+ return "vmov\t%P0, %f1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv16qi"
+ [(set (match_operand:V8QI 0 "s_register_operand" "=w")
+ (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)
+ (const_int 4) (const_int 5)
+ (const_int 6) (const_int 7)])))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src)
+ return "vmov\t%P0, %e1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv8hi"
+ [(set (match_operand:V4HI 0 "s_register_operand" "=w")
+ (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
+ (parallel [(const_int 0) (const_int 1)
+ (const_int 2) (const_int 3)])))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src)
+ return "vmov\t%P0, %e1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv4si"
+ [(set (match_operand:V2SI 0 "s_register_operand" "=w")
+ (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src)
+ return "vmov\t%P0, %e1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv4sf"
+ [(set (match_operand:V2SF 0 "s_register_operand" "=w")
+ (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
+ (parallel [(const_int 0) (const_int 1)])))]
+ "TARGET_NEON"
+{
+ int dest = REGNO (operands[0]);
+ int src = REGNO (operands[1]);
+
+ if (dest != src)
+ return "vmov\t%P0, %e1";
+ else
+ return "";
+}
+ [(set_attr "neon_type" "neon_bp_simple")]
+)
+
+(define_insn "neon_vget_lowv2di"
+ [(set (match_operand:DI 0 "s_register_operand" "=w")
+ (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w")
+ (parallel [(const_int 0)])))]
"TARGET_NEON"
{
int dest = REGNO (operands[0]);