diff options
author | hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2010-07-23 19:37:40 +0000 |
---|---|---|
committer | hjl <hjl@138bc75d-0d04-0410-961f-82ee72b054a4> | 2010-07-23 19:37:40 +0000 |
commit | 10ada81fea4490f94ba2eb5923bf5baa367a38bd (patch) | |
tree | 437dca120093cc7b1f6debf6f6b31779526c7192 /gcc/config/arm/neon.md | |
parent | 95a236de8aa10bf009e9368dfd28f95a980e5570 (diff) | |
parent | 3bd7a983695352a99f7dd597725eb5b839d4b4cf (diff) | |
download | gcc-ifunc.tar.gz |
Merged with trunk at revision 162480.ifunc
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/branches/ifunc@162483 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'gcc/config/arm/neon.md')
-rw-r--r-- | gcc/config/arm/neon.md | 799 |
1 files changed, 509 insertions, 290 deletions
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 43b3805c7ba..15f347c9607 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -22,17 +22,12 @@ (define_constants [(UNSPEC_ASHIFT_SIGNED 65) (UNSPEC_ASHIFT_UNSIGNED 66) - (UNSPEC_VABA 67) - (UNSPEC_VABAL 68) (UNSPEC_VABD 69) (UNSPEC_VABDL 70) - (UNSPEC_VABS 71) (UNSPEC_VADD 72) (UNSPEC_VADDHN 73) (UNSPEC_VADDL 74) (UNSPEC_VADDW 75) - (UNSPEC_VAND 76) - (UNSPEC_VBIC 77) (UNSPEC_VBSL 78) (UNSPEC_VCAGE 79) (UNSPEC_VCAGT 80) @@ -40,18 +35,9 @@ (UNSPEC_VCGE 82) (UNSPEC_VCGT 83) (UNSPEC_VCLS 84) - (UNSPEC_VCLZ 85) - (UNSPEC_VCNT 86) - (UNSPEC_VCOMBINE 87) (UNSPEC_VCVT 88) (UNSPEC_VCVT_N 89) - (UNSPEC_VDUP_LANE 90) - (UNSPEC_VDUP_N 91) - (UNSPEC_VEOR 92) (UNSPEC_VEXT 93) - (UNSPEC_VGET_HIGH 94) - (UNSPEC_VGET_LANE 95) - (UNSPEC_VGET_LOW 96) (UNSPEC_VHADD 97) (UNSPEC_VHSUB 98) (UNSPEC_VLD1 99) @@ -86,10 +72,6 @@ (UNSPEC_VMULL 128) (UNSPEC_VMUL_LANE 129) (UNSPEC_VMULL_LANE 130) - (UNSPEC_VMUL_N 131) - (UNSPEC_VMVN 132) - (UNSPEC_VORN 133) - (UNSPEC_VORR 134) (UNSPEC_VPADAL 135) (UNSPEC_VPADD 136) (UNSPEC_VPADDL 137) @@ -125,7 +107,6 @@ (UNSPEC_VREV64 167) (UNSPEC_VRSQRTE 168) (UNSPEC_VRSQRTS 169) - (UNSPEC_VSET_LANE 170) (UNSPEC_VSHL 171) (UNSPEC_VSHLL_N 172) (UNSPEC_VSHL_N 173) @@ -185,9 +166,6 @@ ;; Opaque structure types wider than TImode. (define_mode_iterator VSTRUCT [EI OI CI XI]) -;; Number of instructions needed to load/store struct elements. FIXME! -(define_mode_attr V_slen [(EI "2") (OI "2") (CI "3") (XI "4")]) - ;; Opaque structure types used in table lookups (except vtbl1/vtbx1). (define_mode_iterator VTAB [TI EI OI]) @@ -335,6 +313,14 @@ (V4HI "V2SI") (V8HI "V4SI") (V2SI "DI") (V4SI "V2DI")]) +;; Double-sized modes with the same element size. +;; Used for neon_vdup_lane, where the second operand is double-sized +;; even when the first one is quad. +(define_mode_attr V_double_vector_mode [(V16QI "V8QI") (V8HI "V4HI") + (V4SI "V2SI") (V4SF "V2SF") + (V8QI "V8QI") (V4HI "V4HI") + (V2SI "V2SI") (V2SF "V2SF")]) + ;; Mode of result of comparison operations (and bit-select operand 1). (define_mode_attr V_cmp_result [(V8QI "V8QI") (V16QI "V16QI") (V4HI "V4HI") (V8HI "V8HI") @@ -587,7 +573,7 @@ } } [(set_attr "neon_type" "neon_int_1,neon_stm_2,neon_ldm_2") - (set_attr "length" "<V_slen>,<V_slen>,<V_slen>")]) + (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))]) (define_split [(set (match_operand:EI 0 "s_register_operand" "") @@ -688,7 +674,7 @@ elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt; operands[2] = GEN_INT (elt); - return "vmov%?.<V_uf_sclr>\t%P0[%c2], %1"; + return "vmov%?.<V_sz_elem>\t%P0[%c2], %1"; } [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_mcr")]) @@ -714,7 +700,7 @@ operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi); operands[2] = GEN_INT (elt); - return "vmov%?.<V_uf_sclr>\t%P0[%c2], %1"; + return "vmov%?.<V_sz_elem>\t%P0[%c2], %1"; } [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_mcr")] @@ -734,7 +720,7 @@ operands[0] = gen_rtx_REG (DImode, regno); - return "vmov%?.64\t%P0, %Q1, %R1"; + return "vmov%?\t%P0, %Q1, %R1"; } [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_mcr_2_mcrr")] @@ -802,11 +788,11 @@ (parallel [(match_operand:SI 2 "immediate_operand" "i")])))] "TARGET_NEON" { - int regno = REGNO (operands[1]) + INTVAL (operands[2]); + int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]); operands[1] = gen_rtx_REG (DImode, regno); - return "vmov%?.64\t%Q0, %R0, %P1"; + return "vmov%?\t%Q0, %R0, %P1 @ v2di"; } [(set_attr "predicable" "yes") (set_attr "neon_type" "neon_int_1")] @@ -823,17 +809,14 @@ ;; Doubleword and quadword arithmetic. -;; NOTE: vadd/vsub and some other instructions also support 64-bit integer -;; element size, which we could potentially use for "long long" operations. We -;; don't want to do this at present though, because moving values from the -;; vector unit to the ARM core is currently slow and 64-bit addition (etc.) is -;; easy to do with ARM instructions anyway. +;; NOTE: some other instructions also support 64-bit integer +;; element size, which we could potentially use for "long long" operations. (define_insn "*add<mode>3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) @@ -843,11 +826,31 @@ (const_string "neon_int_1")))] ) +(define_insn "adddi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") + (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0") + (match_operand:DI 2 "s_register_operand" "w,r,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vadd.i64\t%P0, %P1, %P2"; + case 1: return "#"; + case 2: return "#"; + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "conds" "*,clob,clob") + (set_attr "length" "*,8,8")] +) + (define_insn "*sub<mode>3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) @@ -857,11 +860,32 @@ (const_string "neon_int_2")))] ) +(define_insn "subdi3_neon" + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r") + (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0") + (match_operand:DI 2 "s_register_operand" "w,r,0,0"))) + (clobber (reg:CC CC_REGNUM))] + "TARGET_NEON" +{ + switch (which_alternative) + { + case 0: return "vsub.i64\t%P0, %P1, %P2"; + case 1: /* fall through */ + case 2: /* fall through */ + case 3: return "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2"; + default: gcc_unreachable (); + } +} + [(set_attr "neon_type" "neon_int_2,*,*,*") + (set_attr "conds" "*,clob,clob,clob") + (set_attr "length" "*,8,8,8")] +) + (define_insn "*mul<mode>3_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (mult:VDQ (match_operand:VDQ 1 "s_register_operand" "w") (match_operand:VDQ 2 "s_register_operand" "w")))] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) @@ -878,12 +902,12 @@ (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] ) -(define_insn "*mul<mode>3add<mode>_neon" +(define_insn "mul<mode>3add<mode>_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (plus:VDQ (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") (match_operand:VDQ 3 "s_register_operand" "w")) (match_operand:VDQ 1 "s_register_operand" "0")))] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) @@ -900,12 +924,12 @@ (const_string "neon_mla_qqq_32_qqd_32_scalar")))))] ) -(define_insn "*mul<mode>3neg<mode>add<mode>_neon" +(define_insn "mul<mode>3neg<mode>add<mode>_neon" [(set (match_operand:VDQ 0 "s_register_operand" "=w") (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "0") (mult:VDQ (match_operand:VDQ 2 "s_register_operand" "w") (match_operand:VDQ 3 "s_register_operand" "w"))))] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" [(set (attr "neon_type") (if_then_else (ne (symbol_ref "<Is_float_mode>") (const_int 0)) @@ -940,10 +964,9 @@ ) (define_insn "iordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "w,0") - (match_operand:DI 2 "neon_logic_op2" "w,Dl")] - UNSPEC_VORR))] + [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r") + (ior:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r") + (match_operand:DI 2 "neon_logic_op2" "w,Dl,r,r")))] "TARGET_NEON" { switch (which_alternative) @@ -951,10 +974,13 @@ case 0: return "vorr\t%P0, %P1, %P2"; case 1: return neon_output_logic_immediate ("vorr", &operands[2], DImode, 0, VALID_NEON_QREG_MODE (DImode)); + case 2: return "#"; + case 3: return "#"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") + (set_attr "length" "*,*,8,8")] ) ;; The concrete forms of the Neon immediate-logic instructions are vbic and @@ -980,10 +1006,9 @@ ) (define_insn "anddi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w,w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "w,0") - (match_operand:DI 2 "neon_inv_logic_op2" "w,DL")] - UNSPEC_VAND))] + [(set (match_operand:DI 0 "s_register_operand" "=w,w,?&r,?&r") + (and:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,r") + (match_operand:DI 2 "neon_inv_logic_op2" "w,DL,r,r")))] "TARGET_NEON" { switch (which_alternative) @@ -991,10 +1016,13 @@ case 0: return "vand\t%P0, %P1, %P2"; case 1: return neon_output_logic_immediate ("vand", &operands[2], DImode, 1, VALID_NEON_QREG_MODE (DImode)); + case 2: return "#"; + case 3: return "#"; default: gcc_unreachable (); } } - [(set_attr "neon_type" "neon_int_1")] + [(set_attr "neon_type" "neon_int_1,neon_int_1,*,*") + (set_attr "length" "*,*,8,8")] ) (define_insn "orn<mode>3_neon" @@ -1007,13 +1035,16 @@ ) (define_insn "orndi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "w") - (match_operand:DI 2 "s_register_operand" "w")] - UNSPEC_VORN))] + [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r") + (ior:DI (match_operand:DI 1 "s_register_operand" "w,r,0") + (not:DI (match_operand:DI 2 "s_register_operand" "w,0,r"))))] "TARGET_NEON" - "vorn\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_1")] + "@ + vorn\t%P0, %P1, %P2 + # + #" + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "length" "*,8,8")] ) (define_insn "bic<mode>3_neon" @@ -1025,14 +1056,18 @@ [(set_attr "neon_type" "neon_int_1")] ) +;; Compare to *anddi_notdi_di. (define_insn "bicdi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "w") - (match_operand:DI 2 "s_register_operand" "w")] - UNSPEC_VBIC))] + [(set (match_operand:DI 0 "s_register_operand" "=w,?=&r,?&r") + (and:DI (not:DI (match_operand:DI 2 "s_register_operand" "w,r,0")) + (match_operand:DI 1 "s_register_operand" "w,0,r")))] "TARGET_NEON" - "vbic\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_1")] + "@ + vbic\t%P0, %P1, %P2 + # + #" + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "length" "*,8,8")] ) (define_insn "xor<mode>3" @@ -1045,13 +1080,16 @@ ) (define_insn "xordi3_neon" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "w") - (match_operand:DI 2 "s_register_operand" "w")] - UNSPEC_VEOR))] + [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r") + (xor:DI (match_operand:DI 1 "s_register_operand" "%w,0,r") + (match_operand:DI 2 "s_register_operand" "w,r,r")))] "TARGET_NEON" - "veor\t%P0, %P1, %P2" - [(set_attr "neon_type" "neon_int_1")] + "@ + veor\t%P0, %P1, %P2 + # + #" + [(set_attr "neon_type" "neon_int_1,*,*") + (set_attr "length" "*,8,8")] ) (define_insn "one_cmpl<mode>2" @@ -1320,7 +1358,7 @@ (parallel [(const_int 0) (const_int 1)])) (vec_select:V2SF (match_dup 1) (parallel [(const_int 2) (const_int 3)]))))] - "TARGET_NEON" + "TARGET_NEON && flag_unsafe_math_optimizations" "<VQH_mnem>.f32\t%P0, %e1, %f1" [(set_attr "vqh_mnem" "<VQH_mnem>") (set (attr "neon_type") @@ -1455,7 +1493,7 @@ (define_expand "reduc_splus_<mode>" [(match_operand:VD 0 "s_register_operand" "") (match_operand:VD 1 "s_register_operand" "")] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" { neon_pairwise_reduce (operands[0], operands[1], <MODE>mode, &gen_neon_vpadd_internal<mode>); @@ -1465,7 +1503,7 @@ (define_expand "reduc_splus_<mode>" [(match_operand:VQ 0 "s_register_operand" "") (match_operand:VQ 1 "s_register_operand" "")] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" { rtx step1 = gen_reg_rtx (<V_HALF>mode); rtx res_d = gen_reg_rtx (<V_HALF>mode); @@ -1500,7 +1538,7 @@ (define_expand "reduc_smin_<mode>" [(match_operand:VD 0 "s_register_operand" "") (match_operand:VD 1 "s_register_operand" "")] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" { neon_pairwise_reduce (operands[0], operands[1], <MODE>mode, &gen_neon_vpsmin<mode>); @@ -1510,7 +1548,7 @@ (define_expand "reduc_smin_<mode>" [(match_operand:VQ 0 "s_register_operand" "") (match_operand:VQ 1 "s_register_operand" "")] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" { rtx step1 = gen_reg_rtx (<V_HALF>mode); rtx res_d = gen_reg_rtx (<V_HALF>mode); @@ -1525,7 +1563,7 @@ (define_expand "reduc_smax_<mode>" [(match_operand:VD 0 "s_register_operand" "") (match_operand:VD 1 "s_register_operand" "")] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" { neon_pairwise_reduce (operands[0], operands[1], <MODE>mode, &gen_neon_vpsmax<mode>); @@ -1535,7 +1573,7 @@ (define_expand "reduc_smax_<mode>" [(match_operand:VQ 0 "s_register_operand" "") (match_operand:VQ 1 "s_register_operand" "")] - "TARGET_NEON" + "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" { rtx step1 = gen_reg_rtx (<V_HALF>mode); rtx res_d = gen_reg_rtx (<V_HALF>mode); @@ -1711,11 +1749,37 @@ ; good for plain vadd, vaddq. -(define_insn "neon_vadd<mode>" +(define_expand "neon_vadd<mode>" + [(match_operand:VDQX 0 "s_register_operand" "=w") + (match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (!<Is_float_mode> || flag_unsafe_math_optimizations) + emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Note that NEON operations don't support the full IEEE 754 standard: in +; particular, denormal values are flushed to zero. This means that GCC cannot +; use those instructions for autovectorization, etc. unless +; -funsafe-math-optimizations is in effect (in which case flush-to-zero +; behaviour is permissible). Intrinsic operations (provided by the arm_neon.h +; header) must work in either case: if -funsafe-math-optimizations is given, +; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics +; expand to unspecs (which may potentially limit the extent to which they might +; be optimized by generic code). + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vadd<mode>_unspec" [(set (match_operand:VDQX 0 "s_register_operand" "=w") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") - (match_operand:VDQX 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] + (match_operand:VDQX 2 "s_register_operand" "w")] UNSPEC_VADD))] "TARGET_NEON" "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" @@ -1788,6 +1852,8 @@ [(set_attr "neon_type" "neon_int_4")] ) +;; We cannot replace this unspec with mul<mode>3 because of the odd +;; polynomial multiplication case that can specified by operand 3. (define_insn "neon_vmul<mode>" [(set (match_operand:VDQW 0 "s_register_operand" "=w") (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") @@ -1811,13 +1877,31 @@ (const_string "neon_mul_qqq_8_16_32_ddd_32")))))] ) -(define_insn "neon_vmla<mode>" - [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:VDQW 3 "s_register_operand" "w") - (match_operand:SI 4 "immediate_operand" "i")] - UNSPEC_VMLA))] +(define_expand "neon_vmla<mode>" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (!<Is_float_mode> || flag_unsafe_math_optimizations) + emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1], + operands[2], operands[3])); + else + emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vmla<mode>_unspec" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0") + (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w")] + UNSPEC_VMLA))] "TARGET_NEON" "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" [(set (attr "neon_type") @@ -1850,13 +1934,31 @@ (const_string "neon_mla_ddd_32_qqd_16_ddd_32_scalar_qdd_64_32_long_scalar_qdd_64_32_long")))] ) -(define_insn "neon_vmls<mode>" - [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0") - (match_operand:VDQW 2 "s_register_operand" "w") - (match_operand:VDQW 3 "s_register_operand" "w") - (match_operand:SI 4 "immediate_operand" "i")] - UNSPEC_VMLS))] +(define_expand "neon_vmls<mode>" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:VDQW 1 "s_register_operand" "0") + (match_operand:VDQW 2 "s_register_operand" "w") + (match_operand:VDQW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (!<Is_float_mode> || flag_unsafe_math_optimizations) + emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0], + operands[1], operands[2], operands[3])); + else + emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1], + operands[2], operands[3])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vmls<mode>_unspec" + [(set (match_operand:VDQ 0 "s_register_operand" "=w") + (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "0") + (match_operand:VDQ 2 "s_register_operand" "w") + (match_operand:VDQ 3 "s_register_operand" "w")] + UNSPEC_VMLS))] "TARGET_NEON" "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" [(set (attr "neon_type") @@ -1966,11 +2068,27 @@ (const_string "neon_mul_qdd_64_32_long_qqd_16_ddd_32_scalar_64_32_long_scalar")))] ) -(define_insn "neon_vsub<mode>" +(define_expand "neon_vsub<mode>" + [(match_operand:VDQX 0 "s_register_operand" "=w") + (match_operand:VDQX 1 "s_register_operand" "w") + (match_operand:VDQX 2 "s_register_operand" "w") + (match_operand:SI 3 "immediate_operand" "i")] + "TARGET_NEON" +{ + if (!<Is_float_mode> || flag_unsafe_math_optimizations) + emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2])); + else + emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1], + operands[2])); + DONE; +}) + +; Used for intrinsics when flag_unsafe_math_optimizations is false. + +(define_insn "neon_vsub<mode>_unspec" [(set (match_operand:VDQX 0 "s_register_operand" "=w") (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w") - (match_operand:VDQX 2 "s_register_operand" "w") - (match_operand:SI 3 "immediate_operand" "i")] + (match_operand:VDQX 2 "s_register_operand" "w")] UNSPEC_VSUB))] "TARGET_NEON" "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2" @@ -2153,11 +2271,11 @@ (define_insn "neon_vaba<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") - (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "0") - (match_operand:VDQIW 2 "s_register_operand" "w") - (match_operand:VDQIW 3 "s_register_operand" "w") - (match_operand:SI 4 "immediate_operand" "i")] - UNSPEC_VABA))] + (plus:VDQIW (match_operand:VDQIW 1 "s_register_operand" "0") + (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w") + (match_operand:VDQIW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VABD)))] "TARGET_NEON" "vaba.%T4%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3" [(set (attr "neon_type") @@ -2167,11 +2285,11 @@ (define_insn "neon_vabal<mode>" [(set (match_operand:<V_widen> 0 "s_register_operand" "=w") - (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0") - (match_operand:VW 2 "s_register_operand" "w") - (match_operand:VW 3 "s_register_operand" "w") - (match_operand:SI 4 "immediate_operand" "i")] - UNSPEC_VABAL))] + (plus:<V_widen> (match_operand:<V_widen> 1 "s_register_operand" "0") + (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w") + (match_operand:VW 3 "s_register_operand" "w") + (match_operand:SI 4 "immediate_operand" "i")] + UNSPEC_VABDL)))] "TARGET_NEON" "vabal.%T4%#<V_sz_elem>\t%q0, %P2, %P3" [(set_attr "neon_type" "neon_vaba")] @@ -2302,22 +2420,15 @@ (const_string "neon_fp_vrecps_vrsqrts_qqq")))] ) -(define_insn "neon_vabs<mode>" - [(set (match_operand:VDQW 0 "s_register_operand" "=w") - (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_VABS))] +(define_expand "neon_vabs<mode>" + [(match_operand:VDQW 0 "s_register_operand" "") + (match_operand:VDQW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] "TARGET_NEON" - "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1" - [(set (attr "neon_type") - (if_then_else (ior (ne (symbol_ref "<Is_float_mode>") (const_int 0)) - (ne (symbol_ref "<Is_float_mode>") (const_int 0))) - (if_then_else - (ne (symbol_ref "<Is_d_reg>") (const_int 0)) - (const_string "neon_fp_vadd_ddd_vabs_dd") - (const_string "neon_fp_vadd_qqq_vabs_qq")) - (const_string "neon_vqneg_vqabs")))] -) +{ + emit_insn (gen_abs<mode>2 (operands[0], operands[1])); + DONE; +}) (define_insn "neon_vqabs<mode>" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") @@ -2359,26 +2470,42 @@ [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "neon_vclz<mode>" +(define_insn "clz<mode>2" [(set (match_operand:VDQIW 0 "s_register_operand" "=w") - (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_VCLZ))] + (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))] "TARGET_NEON" "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1" [(set_attr "neon_type" "neon_int_1")] ) -(define_insn "neon_vcnt<mode>" +(define_expand "neon_vclz<mode>" + [(match_operand:VDQIW 0 "s_register_operand" "") + (match_operand:VDQIW 1 "s_register_operand" "") + (match_operand:SI 2 "immediate_operand" "")] + "TARGET_NEON" +{ + emit_insn (gen_clz<mode>2 (operands[0], operands[1])); + DONE; +}) + +(define_insn "popcount<mode>2" [(set (match_operand:VE 0 "s_register_operand" "=w") - (unspec:VE [(match_operand:VE 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_VCNT))] + (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))] "TARGET_NEON" "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1" [(set_attr "neon_type" "neon_int_1")] ) +(define_expand "neon_vcnt<mode>" + [(match_operand:VE 0 "s_register_operand" "=w") + (match_operand:VE 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] + "TARGET_NEON" +{ + emit_insn (gen_popcount<mode>2 (operands[0], operands[1])); + DONE; +}) + (define_insn "neon_vrecpe<mode>" [(set (match_operand:V32 0 "s_register_operand" "=w") (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w") @@ -2555,126 +2682,65 @@ ; Operand 3 (info word) is ignored because it does nothing useful with 64-bit ; elements. -(define_insn "neon_vget_lanedi" - [(set (match_operand:DI 0 "s_register_operand" "=r") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VGET_LANE))] +(define_expand "neon_vget_lanedi" + [(match_operand:DI 0 "s_register_operand" "=r") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] "TARGET_NEON" { neon_lane_bounds (operands[2], 0, 1); - return "vmov%?\t%Q0, %R0, %P1 @ di"; -} - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] -) + emit_move_insn (operands[0], operands[1]); + DONE; +}) -(define_insn "neon_vget_lanev2di" - [(set (match_operand:DI 0 "s_register_operand" "=r") - (unspec:DI [(match_operand:V2DI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VGET_LANE))] +(define_expand "neon_vget_lanev2di" + [(match_operand:DI 0 "s_register_operand" "=r") + (match_operand:V2DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i") + (match_operand:SI 3 "immediate_operand" "i")] "TARGET_NEON" { - rtx ops[2]; - unsigned int regno = REGNO (operands[1]); - unsigned int elt = INTVAL (operands[2]); - neon_lane_bounds (operands[2], 0, 2); + emit_insn (gen_vec_extractv2di (operands[0], operands[1], operands[2])); + DONE; +}) - ops[0] = operands[0]; - ops[1] = gen_rtx_REG (DImode, regno + 2 * elt); - output_asm_insn ("vmov%?\t%Q0, %R0, %P1 @ v2di", ops); - - return ""; -} - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] -) - -(define_insn "neon_vset_lane<mode>" - [(set (match_operand:VD 0 "s_register_operand" "=w") - (unspec:VD [(match_operand:<V_elem> 1 "s_register_operand" "r") - (match_operand:VD 2 "s_register_operand" "0") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VSET_LANE))] - "TARGET_NEON" -{ - neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); - return "vmov%?.<V_sz_elem>\t%P0[%c3], %1"; -} - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] -) - -; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. - -(define_insn "neon_vset_lanedi" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "r") - (match_operand:DI 2 "s_register_operand" "0") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VSET_LANE))] - "TARGET_NEON" -{ - neon_lane_bounds (operands[3], 0, 1); - return "vmov%?\t%P0, %Q1, %R1 @ di"; -} - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] -) - -(define_insn "neon_vset_lane<mode>" - [(set (match_operand:VQ 0 "s_register_operand" "=w") - (unspec:VQ [(match_operand:<V_elem> 1 "s_register_operand" "r") - (match_operand:VQ 2 "s_register_operand" "0") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VSET_LANE))] +(define_expand "neon_vset_lane<mode>" + [(match_operand:VDQ 0 "s_register_operand" "=w") + (match_operand:<V_elem> 1 "s_register_operand" "r") + (match_operand:VDQ 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] "TARGET_NEON" { - rtx ops[4]; - unsigned int regno = REGNO (operands[0]); - unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2; unsigned int elt = INTVAL (operands[3]); + neon_lane_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode)); - neon_lane_bounds (operands[3], 0, halfelts * 2); + if (BYTES_BIG_ENDIAN) + { + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<MODE>mode)); + elt ^= reg_nelts - 1; + } - ops[0] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts)); - ops[1] = operands[1]; - ops[2] = GEN_INT (elt % halfelts); - output_asm_insn ("vmov%?.<V_sz_elem>\t%P0[%c2], %1", ops); + emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1], + GEN_INT (1 << elt), operands[2])); + DONE; +}) - return ""; -} - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] -) +; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored. -(define_insn "neon_vset_lanev2di" - [(set (match_operand:V2DI 0 "s_register_operand" "=w") - (unspec:V2DI [(match_operand:DI 1 "s_register_operand" "r") - (match_operand:V2DI 2 "s_register_operand" "0") - (match_operand:SI 3 "immediate_operand" "i")] - UNSPEC_VSET_LANE))] +(define_expand "neon_vset_lanedi" + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "r") + (match_operand:DI 2 "s_register_operand" "0") + (match_operand:SI 3 "immediate_operand" "i")] "TARGET_NEON" { - rtx ops[2]; - unsigned int regno = REGNO (operands[0]); - unsigned int elt = INTVAL (operands[3]); - - neon_lane_bounds (operands[3], 0, 2); - - ops[0] = gen_rtx_REG (DImode, regno + 2 * elt); - ops[1] = operands[1]; - output_asm_insn ("vmov%?\t%P0, %Q1, %R1 @ v2di", ops); - - return ""; -} - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] -) + neon_lane_bounds (operands[3], 0, 1); + emit_move_insn (operands[0], operands[1]); + DONE; +}) (define_expand "neon_vcreate<mode>" [(match_operand:VDX 0 "s_register_operand" "") @@ -2688,8 +2754,7 @@ (define_insn "neon_vdup_n<mode>" [(set (match_operand:VX 0 "s_register_operand" "=w") - (unspec:VX [(match_operand:<V_elem> 1 "s_register_operand" "r")] - UNSPEC_VDUP_N))] + (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))] "TARGET_NEON" "vdup%?.<V_sz_elem>\t%<V_reg>0, %1" ;; Assume this schedules like vmov. @@ -2699,8 +2764,7 @@ (define_insn "neon_vdup_n<mode>" [(set (match_operand:V32 0 "s_register_operand" "=w,w") - (unspec:V32 [(match_operand:<V_elem> 1 "s_register_operand" "r,t")] - UNSPEC_VDUP_N))] + (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))] "TARGET_NEON" "@ vdup%?.<V_sz_elem>\t%<V_reg>0, %1 @@ -2710,61 +2774,76 @@ (set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "neon_vdup_ndi" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "r")] - UNSPEC_VDUP_N))] +(define_expand "neon_vdup_ndi" + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "r")] "TARGET_NEON" - "vmov%?\t%P0, %Q1, %R1" - [(set_attr "predicable" "yes") - (set_attr "neon_type" "neon_bp_simple")] +{ + emit_move_insn (operands[0], operands[1]); + DONE; +} ) (define_insn "neon_vdup_nv2di" - [(set (match_operand:V2DI 0 "s_register_operand" "=w") - (unspec:V2DI [(match_operand:DI 1 "s_register_operand" "r")] - UNSPEC_VDUP_N))] + [(set (match_operand:V2DI 0 "s_register_operand" "=w,w") + (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))] "TARGET_NEON" - "vmov%?\t%e0, %Q1, %R1\;vmov%?\t%f0, %Q1, %R1" + "@ + vmov%?\t%e0, %Q1, %R1\;vmov%?\t%f0, %Q1, %R1 + vmov%?\t%e0, %P1\;vmov%?\t%f0, %P1" [(set_attr "predicable" "yes") (set_attr "length" "8") (set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "neon_vdup_lane<mode>" - [(set (match_operand:VD 0 "s_register_operand" "=w") - (unspec:VD [(match_operand:VD 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_VDUP_LANE))] +(define_insn "neon_vdup_lane<mode>_internal" + [(set (match_operand:VDQW 0 "s_register_operand" "=w") + (vec_duplicate:VDQW + (vec_select:<V_elem> + (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") + (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))] "TARGET_NEON" { - neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<MODE>mode)); - return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; + if (BYTES_BIG_ENDIAN) + { + int elt = INTVAL (operands[2]); + elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt; + operands[2] = GEN_INT (elt); + } + if (<Is_d_reg>) + return "vdup.<V_sz_elem>\t%P0, %P1[%c2]"; + else + return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; } ;; Assume this schedules like vmov. [(set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "neon_vdup_lane<mode>" - [(set (match_operand:VQ 0 "s_register_operand" "=w") - (unspec:VQ [(match_operand:<V_HALF> 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_VDUP_LANE))] +(define_expand "neon_vdup_lane<mode>" + [(match_operand:VDQW 0 "s_register_operand" "=w") + (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] "TARGET_NEON" { - neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_HALF>mode)); - return "vdup.<V_sz_elem>\t%q0, %P1[%c2]"; -} - ;; Assume this schedules like vmov. - [(set_attr "neon_type" "neon_bp_simple")] -) + neon_lane_bounds (operands[2], 0, GET_MODE_NUNITS (<V_double_vector_mode>mode)); + if (BYTES_BIG_ENDIAN) + { + unsigned int elt = INTVAL (operands[2]); + unsigned int reg_nelts + = 64 / GET_MODE_BITSIZE (GET_MODE_INNER (<V_double_vector_mode>mode)); + elt ^= reg_nelts - 1; + operands[2] = GEN_INT (elt); + } + emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1], + operands[2])); + DONE; +}) ; Scalar index is ignored, since only zero is valid here. (define_expand "neon_vdup_lanedi" - [(set (match_operand:DI 0 "s_register_operand" "=w") - (unspec:DI [(match_operand:DI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_VDUP_LANE))] + [(match_operand:DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] "TARGET_NEON" { neon_lane_bounds (operands[2], 0, 1); @@ -2772,20 +2851,17 @@ DONE; }) -; Likewise. -(define_insn "neon_vdup_lanev2di" - [(set (match_operand:V2DI 0 "s_register_operand" "=w") - (unspec:V2DI [(match_operand:DI 1 "s_register_operand" "w") - (match_operand:SI 2 "immediate_operand" "i")] - UNSPEC_VDUP_LANE))] +; Likewise for v2di, as the DImode second operand has only a single element. +(define_expand "neon_vdup_lanev2di" + [(match_operand:V2DI 0 "s_register_operand" "=w") + (match_operand:DI 1 "s_register_operand" "w") + (match_operand:SI 2 "immediate_operand" "i")] "TARGET_NEON" { neon_lane_bounds (operands[2], 0, 1); - return "vmov\t%e0, %P1\;vmov\t%f0, %P1"; -} - [(set_attr "length" "8") - (set_attr "neon_type" "neon_bp_simple")] -) + emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1])); + DONE; +}) ;; In this insn, operand 1 should be low, and operand 2 the high part of the ;; dest vector. @@ -2796,9 +2872,8 @@ (define_insn "neon_vcombine<mode>" [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w") - (unspec:<V_DOUBLE> [(match_operand:VDX 1 "s_register_operand" "w") - (match_operand:VDX 2 "s_register_operand" "w")] - UNSPEC_VCOMBINE))] + (vec_concat:<V_DOUBLE> (match_operand:VDX 1 "s_register_operand" "w") + (match_operand:VDX 2 "s_register_operand" "w")))] "TARGET_NEON" { int dest = REGNO (operands[0]); @@ -2838,10 +2913,31 @@ (set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "neon_vget_high<mode>" - [(set (match_operand:<V_HALF> 0 "s_register_operand" "=w") - (unspec:<V_HALF> [(match_operand:VQX 1 "s_register_operand" "w")] - UNSPEC_VGET_HIGH))] +(define_insn "neon_vget_highv16qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") + (parallel [(const_int 8) (const_int 9) + (const_int 10) (const_int 11) + (const_int 12) (const_int 13) + (const_int 14) (const_int 15)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_highv8hi" + [(set (match_operand:V4HI 0 "s_register_operand" "=w") + (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") + (parallel [(const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] "TARGET_NEON" { int dest = REGNO (operands[0]); @@ -2855,10 +2951,133 @@ [(set_attr "neon_type" "neon_bp_simple")] ) -(define_insn "neon_vget_low<mode>" - [(set (match_operand:<V_HALF> 0 "s_register_operand" "=w") - (unspec:<V_HALF> [(match_operand:VQX 1 "s_register_operand" "w")] - UNSPEC_VGET_LOW))] +(define_insn "neon_vget_highv4si" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") + (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_highv4sf" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") + (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") + (parallel [(const_int 2) (const_int 3)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_highv2di" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") + (parallel [(const_int 1)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src + 2) + return "vmov\t%P0, %f1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv16qi" + [(set (match_operand:V8QI 0 "s_register_operand" "=w") + (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3) + (const_int 4) (const_int 5) + (const_int 6) (const_int 7)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv8hi" + [(set (match_operand:V4HI 0 "s_register_operand" "=w") + (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1) + (const_int 2) (const_int 3)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv4si" + [(set (match_operand:V2SI 0 "s_register_operand" "=w") + (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv4sf" + [(set (match_operand:V2SF 0 "s_register_operand" "=w") + (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w") + (parallel [(const_int 0) (const_int 1)])))] + "TARGET_NEON" +{ + int dest = REGNO (operands[0]); + int src = REGNO (operands[1]); + + if (dest != src) + return "vmov\t%P0, %e1"; + else + return ""; +} + [(set_attr "neon_type" "neon_bp_simple")] +) + +(define_insn "neon_vget_lowv2di" + [(set (match_operand:DI 0 "s_register_operand" "=w") + (vec_select:DI (match_operand:V2DI 1 "s_register_operand" "w") + (parallel [(const_int 0)])))] "TARGET_NEON" { int dest = REGNO (operands[0]); |