diff options
Diffstat (limited to 'gcc')
-rw-r--r-- | gcc/config/arm/arm-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/arm/arm.c | 211 | ||||
-rw-r--r-- | gcc/config/arm/iterators.md | 9 | ||||
-rw-r--r-- | gcc/config/arm/mve.md | 10 | ||||
-rw-r--r-- | gcc/config/arm/neon.md | 87 | ||||
-rw-r--r-- | gcc/config/arm/unspecs.md | 20 | ||||
-rw-r--r-- | gcc/config/arm/vec-common.md | 108 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c | 80 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-compare-2.c | 38 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-compare-scalar-1.c | 69 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f32.c | 30 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/arm/simd/mve-vcmp.c | 50 |
12 files changed, 548 insertions, 166 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h index 25215417751..ffccaa77377 100644 --- a/gcc/config/arm/arm-protos.h +++ b/gcc/config/arm/arm-protos.h @@ -373,7 +373,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx, extern bool arm_fusion_enabled_p (tune_params::fuse_ops); extern bool arm_valid_symbolic_address_p (rtx); extern bool arm_validize_comparison (rtx *, rtx *, rtx *); -extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool); +extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool, bool); #endif /* RTX_CODE */ extern bool arm_gen_setmem (rtx *); diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c index d0c0c50be97..eee3671848f 100644 --- a/gcc/config/arm/arm.c +++ b/gcc/config/arm/arm.c @@ -30959,66 +30959,113 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem, and return true if TARGET contains the inverse. If !CAN_INVERT, always store the result in TARGET, never its inverse. + If VCOND_MVE, do not emit the vpsel instruction here, let arm_expand_vcond do + it with the right destination type to avoid emiting two vpsel, one here and + one in arm_expand_vcond. + Note that the handling of floating-point comparisons is not IEEE compliant. */ bool arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, - bool can_invert) + bool can_invert, bool vcond_mve) { machine_mode cmp_result_mode = GET_MODE (target); machine_mode cmp_mode = GET_MODE (op0); bool inverted; - switch (code) - { - /* For these we need to compute the inverse of the requested - comparison. */ - case UNORDERED: - case UNLT: - case UNLE: - case UNGT: - case UNGE: - case UNEQ: - case NE: - code = reverse_condition_maybe_unordered (code); - if (!can_invert) - { - /* Recursively emit the inverted comparison into a temporary - and then store its inverse in TARGET. This avoids reusing - TARGET (which for integer NE could be one of the inputs). */ - rtx tmp = gen_reg_rtx (cmp_result_mode); - if (arm_expand_vector_compare (tmp, code, op0, op1, true)) - gcc_unreachable (); - emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp))); - return false; - } - inverted = true; - break; - default: + /* MVE supports more comparisons than Neon. */ + if (TARGET_HAVE_MVE) inverted = false; - break; - } + else + switch (code) + { + /* For these we need to compute the inverse of the requested + comparison. */ + case UNORDERED: + case UNLT: + case UNLE: + case UNGT: + case UNGE: + case UNEQ: + case NE: + code = reverse_condition_maybe_unordered (code); + if (!can_invert) + { + /* Recursively emit the inverted comparison into a temporary + and then store its inverse in TARGET. This avoids reusing + TARGET (which for integer NE could be one of the inputs). */ + rtx tmp = gen_reg_rtx (cmp_result_mode); + if (arm_expand_vector_compare (tmp, code, op0, op1, true, vcond_mve)) + gcc_unreachable (); + emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp))); + return false; + } + inverted = true; + break; + + default: + inverted = false; + break; + } switch (code) { - /* These are natively supported for zero comparisons, but otherwise - require the operands to be swapped. */ + /* These are natively supported by Neon for zero comparisons, but otherwise + require the operands to be swapped. For MVE, we can only compare + registers. */ case LE: case LT: - if (op1 != CONST0_RTX (cmp_mode)) - { - code = swap_condition (code); - std::swap (op0, op1); - } + if (!TARGET_HAVE_MVE) + if (op1 != CONST0_RTX (cmp_mode)) + { + code = swap_condition (code); + std::swap (op0, op1); + } /* Fall through. */ - /* These are natively supported for both register and zero operands. */ + /* These are natively supported by Neon for both register and zero + operands. MVE supports registers only. */ case EQ: case GE: case GT: - emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1)); + case NE: + if (TARGET_HAVE_MVE) + { + rtx vpr_p0; + if (vcond_mve) + vpr_p0 = target; + else + vpr_p0 = gen_reg_rtx (HImode); + + switch (GET_MODE_CLASS (cmp_mode)) + { + case MODE_VECTOR_INT: + emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); + break; + case MODE_VECTOR_FLOAT: + if (TARGET_HAVE_MVE_FLOAT) + emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); + else + gcc_unreachable (); + break; + default: + gcc_unreachable (); + } + + /* If we are not expanding a vcond, build the result here. */ + if (!vcond_mve) + { + rtx zero = gen_reg_rtx (cmp_result_mode); + rtx one = gen_reg_rtx (cmp_result_mode); + emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); + emit_move_insn (one, CONST1_RTX (cmp_result_mode)); + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); + } + } + else + emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1)); return inverted; /* These are natively supported for register operands only. @@ -31026,16 +31073,54 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, or canonicalized by target-independent code. */ case GEU: case GTU: - emit_insn (gen_neon_vc (code, cmp_mode, target, - op0, force_reg (cmp_mode, op1))); + if (TARGET_HAVE_MVE) + { + rtx vpr_p0; + if (vcond_mve) + vpr_p0 = target; + else + vpr_p0 = gen_reg_rtx (HImode); + + emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1))); + if (!vcond_mve) + { + rtx zero = gen_reg_rtx (cmp_result_mode); + rtx one = gen_reg_rtx (cmp_result_mode); + emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); + emit_move_insn (one, CONST1_RTX (cmp_result_mode)); + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); + } + } + else + emit_insn (gen_neon_vc (code, cmp_mode, target, + op0, force_reg (cmp_mode, op1))); return inverted; /* These require the operands to be swapped and likewise do not support comparisons with zero. */ case LEU: case LTU: - emit_insn (gen_neon_vc (swap_condition (code), cmp_mode, - target, force_reg (cmp_mode, op1), op0)); + if (TARGET_HAVE_MVE) + { + rtx vpr_p0; + if (vcond_mve) + vpr_p0 = target; + else + vpr_p0 = gen_reg_rtx (HImode); + + emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0)); + if (!vcond_mve) + { + rtx zero = gen_reg_rtx (cmp_result_mode); + rtx one = gen_reg_rtx (cmp_result_mode); + emit_move_insn (zero, CONST0_RTX (cmp_result_mode)); + emit_move_insn (one, CONST1_RTX (cmp_result_mode)); + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0)); + } + } + else + emit_insn (gen_neon_vc (swap_condition (code), cmp_mode, + target, force_reg (cmp_mode, op1), op0)); return inverted; /* These need a combination of two comparisons. */ @@ -31047,8 +31132,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, rtx gt_res = gen_reg_rtx (cmp_result_mode); rtx alt_res = gen_reg_rtx (cmp_result_mode); rtx_code alt_code = (code == LTGT ? LT : LE); - if (arm_expand_vector_compare (gt_res, GT, op0, op1, true) - || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true)) + if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, vcond_mve) + || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, vcond_mve)) gcc_unreachable (); emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode, gt_res, alt_res))); @@ -31066,13 +31151,47 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1, void arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode) { - rtx mask = gen_reg_rtx (cmp_result_mode); + /* When expanding for MVE, we do not want to emit a (useless) vpsel in + arm_expand_vector_compare, and another one here. */ + bool vcond_mve=false; + rtx mask; + + if (TARGET_HAVE_MVE) + { + vcond_mve=true; + mask = gen_reg_rtx (HImode); + } + else + mask = gen_reg_rtx (cmp_result_mode); + bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]), - operands[4], operands[5], true); + operands[4], operands[5], true, vcond_mve); if (inverted) std::swap (operands[1], operands[2]); + if (TARGET_NEON) emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0], mask, operands[1], operands[2])); + else + { + machine_mode cmp_mode = GET_MODE (operands[4]); + rtx vpr_p0 = mask; + rtx zero = gen_reg_rtx (cmp_mode); + rtx one = gen_reg_rtx (cmp_mode); + emit_move_insn (zero, CONST0_RTX (cmp_mode)); + emit_move_insn (one, CONST1_RTX (cmp_mode)); + switch (GET_MODE_CLASS (cmp_mode)) + { + case MODE_VECTOR_INT: + emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], one, zero, vpr_p0)); + break; + case MODE_VECTOR_FLOAT: + if (TARGET_HAVE_MVE_FLOAT) + emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, vpr_p0)); + break; + default: + gcc_unreachable (); + } + } } #define MAX_VECT_LEN 16 diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md index 95df8bdf77d..a128465feea 100644 --- a/gcc/config/arm/iterators.md +++ b/gcc/config/arm/iterators.md @@ -1288,12 +1288,11 @@ (VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s") (VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u") (VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s") - (VADDLVQ_P_U "u") (VCMPNEQ_S "s") + (VADDLVQ_P_U "u") (VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s") (VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u") (VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBRSRQ_N_S "s") - (VBRSRQ_N_U "u") (VCMPEQQ_S "s") - (VCMPEQQ_N_S "s") (VCMPNEQ_N_S "s") + (VBRSRQ_N_U "u") (VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s") (VHADDQ_U "u") (VHSUBQ_N_S "s") (VHSUBQ_N_U "u") (VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u") @@ -1549,16 +1548,12 @@ (define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U]) (define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U]) (define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U]) -(define_int_iterator VCMPNEQ [VCMPNEQ_S]) (define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U]) (define_int_iterator VABDQ [VABDQ_S VABDQ_U]) (define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U]) (define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U]) (define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S]) (define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S]) -(define_int_iterator VCMPEQQ [VCMPEQQ_S]) -(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S]) -(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_S]) (define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U]) (define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S]) (define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U]) diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md index 45df2110ae9..133ebe93cf3 100644 --- a/gcc/config/arm/mve.md +++ b/gcc/config/arm/mve.md @@ -813,7 +813,7 @@ ;; ;; [vcmpneq_, vcmpcsq_, vcmpeqq_, vcmpgeq_, vcmpgtq_, vcmphiq_, vcmpleq_, vcmpltq_]) ;; -(define_insn "mve_vcmp<mve_cmp_op>q_<mode>" +(define_insn "@mve_vcmp<mve_cmp_op>q_<mode>" [ (set (match_operand:HI 0 "vpr_register_operand" "=Up") (MVE_COMPARISONS:HI (match_operand:MVE_2 1 "s_register_operand" "w") @@ -1903,7 +1903,7 @@ ;; ;; [vcmpeqq_f, vcmpgeq_f, vcmpgtq_f, vcmpleq_f, vcmpltq_f, vcmpneq_f]) ;; -(define_insn "mve_vcmp<mve_cmp_op>q_f<mode>" +(define_insn "@mve_vcmp<mve_cmp_op>q_f<mode>" [ (set (match_operand:HI 0 "vpr_register_operand" "=Up") (MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 "s_register_operand" "w") @@ -1917,7 +1917,7 @@ ;; ;; [vcmpeqq_n_f, vcmpgeq_n_f, vcmpgtq_n_f, vcmpleq_n_f, vcmpltq_n_f, vcmpneq_n_f]) ;; -(define_insn "mve_vcmp<mve_cmp_op>q_n_f<mode>" +(define_insn "@mve_vcmp<mve_cmp_op>q_n_f<mode>" [ (set (match_operand:HI 0 "vpr_register_operand" "=Up") (MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 "s_register_operand" "w") @@ -3282,7 +3282,7 @@ ;; ;; [vpselq_u, vpselq_s]) ;; -(define_insn "mve_vpselq_<supf><mode>" +(define_insn "@mve_vpselq_<supf><mode>" [ (set (match_operand:MVE_1 0 "s_register_operand" "=w") (unspec:MVE_1 [(match_operand:MVE_1 1 "s_register_operand" "w") @@ -4377,7 +4377,7 @@ ;; ;; [vpselq_f]) ;; -(define_insn "mve_vpselq_f<mode>" +(define_insn "@mve_vpselq_f<mode>" [ (set (match_operand:MVE_0 0 "s_register_operand" "=w") (unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w") diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md index 641d26fed47..cc82d068a1c 100644 --- a/gcc/config/arm/neon.md +++ b/gcc/config/arm/neon.md @@ -1416,93 +1416,6 @@ [(set_attr "type" "neon_qsub<q>")] ) -(define_expand "vec_cmp<mode><v_cmp_result>" - [(set (match_operand:<V_cmp_result> 0 "s_register_operand") - (match_operator:<V_cmp_result> 1 "comparison_operator" - [(match_operand:VDQW 2 "s_register_operand") - (match_operand:VDQW 3 "reg_or_zero_operand")]))] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" -{ - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), - operands[2], operands[3], false); - DONE; -}) - -(define_expand "vec_cmpu<mode><mode>" - [(set (match_operand:VDQIW 0 "s_register_operand") - (match_operator:VDQIW 1 "comparison_operator" - [(match_operand:VDQIW 2 "s_register_operand") - (match_operand:VDQIW 3 "reg_or_zero_operand")]))] - "TARGET_NEON" -{ - arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), - operands[2], operands[3], false); - DONE; -}) - -;; Conditional instructions. These are comparisons with conditional moves for -;; vectors. They perform the assignment: -;; -;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2; -;; -;; where op3 is <, <=, ==, !=, >= or >. Operations are performed -;; element-wise. - -(define_expand "vcond<mode><mode>" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW - (match_operator 3 "comparison_operator" - [(match_operand:VDQW 4 "s_register_operand") - (match_operand:VDQW 5 "reg_or_zero_operand")]) - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" -{ - arm_expand_vcond (operands, <V_cmp_result>mode); - DONE; -}) - -(define_expand "vcond<V_cvtto><mode>" - [(set (match_operand:<V_CVTTO> 0 "s_register_operand") - (if_then_else:<V_CVTTO> - (match_operator 3 "comparison_operator" - [(match_operand:V32 4 "s_register_operand") - (match_operand:V32 5 "reg_or_zero_operand")]) - (match_operand:<V_CVTTO> 1 "s_register_operand") - (match_operand:<V_CVTTO> 2 "s_register_operand")))] - "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)" -{ - arm_expand_vcond (operands, <V_cmp_result>mode); - DONE; -}) - -(define_expand "vcondu<mode><v_cmp_result>" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW - (match_operator 3 "arm_comparison_operator" - [(match_operand:<V_cmp_result> 4 "s_register_operand") - (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")]) - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] - "TARGET_NEON" -{ - arm_expand_vcond (operands, <V_cmp_result>mode); - DONE; -}) - -(define_expand "vcond_mask_<mode><v_cmp_result>" - [(set (match_operand:VDQW 0 "s_register_operand") - (if_then_else:VDQW - (match_operand:<V_cmp_result> 3 "s_register_operand") - (match_operand:VDQW 1 "s_register_operand") - (match_operand:VDQW 2 "s_register_operand")))] - "TARGET_NEON" -{ - emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1], - operands[2])); - DONE; -}) - ;; Patterns for builtins. ; good for plain vadd, vaddq. diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md index 07ca53b8b0b..0778db1bf4f 100644 --- a/gcc/config/arm/unspecs.md +++ b/gcc/config/arm/unspecs.md @@ -596,8 +596,6 @@ VCVTQ_N_FROM_F_U VADDLVQ_P_S VADDLVQ_P_U - VCMPNEQ_U - VCMPNEQ_S VSHLQ_S VSHLQ_U VABDQ_S @@ -605,9 +603,6 @@ VADDVAQ_S VADDVQ_P_S VBRSRQ_N_S - VCMPEQQ_S - VCMPEQQ_N_S - VCMPNEQ_N_S VHADDQ_S VHADDQ_N_S VHSUBQ_S @@ -645,9 +640,6 @@ VADDVAQ_U VADDVQ_P_U VBRSRQ_N_U - VCMPEQQ_U - VCMPEQQ_N_U - VCMPNEQ_N_U VHADDQ_U VHADDQ_N_U VHSUBQ_U @@ -680,14 +672,6 @@ VSHLQ_R_U VSUBQ_U VSUBQ_N_U - VCMPGEQ_N_S - VCMPGEQ_S - VCMPGTQ_N_S - VCMPGTQ_S - VCMPLEQ_N_S - VCMPLEQ_S - VCMPLTQ_N_S - VCMPLTQ_S VHCADDQ_ROT270_S VHCADDQ_ROT90_S VMAXAQ_S @@ -702,10 +686,6 @@ VQRDMULHQ_N_S VQRDMULHQ_S VQSHLUQ_N_S - VCMPCSQ_N_U - VCMPCSQ_U - VCMPHIQ_N_U - VCMPHIQ_U VABDQ_M_S VABDQ_M_U VABDQ_F diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md index 0b2b3b1c9ce..448731f7be9 100644 --- a/gcc/config/arm/vec-common.md +++ b/gcc/config/arm/vec-common.md @@ -362,3 +362,111 @@ DONE; } }) + +(define_expand "vec_cmp<mode><v_cmp_result>" + [(set (match_operand:<V_cmp_result> 0 "s_register_operand") + (match_operator:<V_cmp_result> 1 "comparison_operator" + [(match_operand:VDQW 2 "s_register_operand") + (match_operand:VDQW 3 "reg_or_zero_operand")]))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false, false); + DONE; +}) + +(define_expand "vec_cmpu<mode><mode>" + [(set (match_operand:VDQIW 0 "s_register_operand") + (match_operator:VDQIW 1 "comparison_operator" + [(match_operand:VDQIW 2 "s_register_operand") + (match_operand:VDQIW 3 "reg_or_zero_operand")]))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT" +{ + arm_expand_vector_compare (operands[0], GET_CODE (operands[1]), + operands[2], operands[3], false, false); + DONE; +}) + +;; Conditional instructions. These are comparisons with conditional moves for +;; vectors. They perform the assignment: +;; +;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2; +;; +;; where op3 is <, <=, ==, !=, >= or >. Operations are performed +;; element-wise. + +(define_expand "vcond<mode><mode>" + [(set (match_operand:VDQW 0 "s_register_operand") + (if_then_else:VDQW + (match_operator 3 "comparison_operator" + [(match_operand:VDQW 4 "s_register_operand") + (match_operand:VDQW 5 "reg_or_zero_operand")]) + (match_operand:VDQW 1 "s_register_operand") + (match_operand:VDQW 2 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + arm_expand_vcond (operands, <V_cmp_result>mode); + DONE; +}) + +(define_expand "vcond<V_cvtto><mode>" + [(set (match_operand:<V_CVTTO> 0 "s_register_operand") + (if_then_else:<V_CVTTO> + (match_operator 3 "comparison_operator" + [(match_operand:V32 4 "s_register_operand") + (match_operand:V32 5 "reg_or_zero_operand")]) + (match_operand:<V_CVTTO> 1 "s_register_operand") + (match_operand:<V_CVTTO> 2 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + arm_expand_vcond (operands, <V_cmp_result>mode); + DONE; +}) + +(define_expand "vcondu<mode><v_cmp_result>" + [(set (match_operand:VDQW 0 "s_register_operand") + (if_then_else:VDQW + (match_operator 3 "arm_comparison_operator" + [(match_operand:<V_cmp_result> 4 "s_register_operand") + (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")]) + (match_operand:VDQW 1 "s_register_operand") + (match_operand:VDQW 2 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT" +{ + arm_expand_vcond (operands, <V_cmp_result>mode); + DONE; +}) + +(define_expand "vcond_mask_<mode><v_cmp_result>" + [(set (match_operand:VDQW 0 "s_register_operand") + (if_then_else:VDQW + (match_operand:<V_cmp_result> 3 "s_register_operand") + (match_operand:VDQW 1 "s_register_operand") + (match_operand:VDQW 2 "s_register_operand")))] + "ARM_HAVE_<MODE>_ARITH + && !TARGET_REALLY_IWMMXT + && (!<Is_float_mode> || flag_unsafe_math_optimizations)" +{ + if (TARGET_NEON) + { + emit_insn (gen_neon_vbsl (<MODE>mode, operands[0], operands[3], + operands[1], operands[2])); + } + else if (TARGET_HAVE_MVE) + { + emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0], + operands[1], operands[2], operands[3])); + } + else + gcc_unreachable (); + + DONE; +}) diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c new file mode 100644 index 00000000000..029c931f47f --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c @@ -0,0 +1,80 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +/* Integer tests. */ + +#define COMPARE_REG(NAME, OP, TYPE) \ + TYPE \ + cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \ + { \ + return a OP b; \ + } + +#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \ + COMPARE_REG (NAME, OP, TYPE) \ + \ + TYPE \ + cmp_##NAME##_##TYPE##_zero (TYPE a) \ + { \ + return a OP (TYPE) {}; \ + } + +#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \ + COMPARE_REG_AND_ZERO (eq, ==, TYPE) \ + COMPARE_REG_AND_ZERO (ne, !=, TYPE) \ + COMPARE_ORDERED (lt, <, TYPE) \ + COMPARE_ORDERED (le, <=, TYPE) \ + COMPARE_ORDERED (gt, >, TYPE) \ + COMPARE_ORDERED (ge, >=, TYPE) + +#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED, SIZE) \ + typedef ELEM NAME##SIZE __attribute__((vector_size(SIZE))); \ + COMPARE_TYPE (NAME##SIZE, COMPARE_ORDERED) + +/* 64-bits vectors, not vectorized. */ +TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO, 8) +TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG, 8) +TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO, 8) +TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG, 8) +TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO, 8) +TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG, 8) + +/* 128-bits vectors. */ +TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO, 16) +TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG, 16) +TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO, 16) +TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG, 16) +TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO, 16) +TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG, 16) + +/* { 8 bits } x { eq, ne, lt, le, gt, ge, hi, cs }. +/* { dg-final { scan-assembler-times {\tvcmp.i8 eq, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.i8 ne, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s8 lt, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s8 le, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s8 gt, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s8 ge, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u8 hi, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u8 cs, q[0-9]+, q[0-9]+\n} 2 } } */ + +/* { 16 bits } x { eq, ne, lt, le, gt, ge, hi, cs }. +/* { dg-final { scan-assembler-times {\tvcmp.i16 eq, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.i16 ne, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s16 lt, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s16 le, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s16 gt, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s16 ge, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u16 hi, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u16 cs, q[0-9]+, q[0-9]+\n} 2 } } */ + +/* { 32 bits } x { eq, ne, lt, le, gt, ge, hi, cs }. +/* { dg-final { scan-assembler-times {\tvcmp.i32 eq, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.i32 ne, q[0-9]+, q[0-9]+\n} 4 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s32 lt, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s32 le, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s32 gt, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s32 ge, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u32 hi, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u32 cs, q[0-9]+, q[0-9]+\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-2.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-2.c new file mode 100644 index 00000000000..8515195ec87 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-2.c @@ -0,0 +1,38 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +/* float 32 tests. */ + +#ifndef ELEM_TYPE +#define ELEM_TYPE float +#endif +#ifndef INT_ELEM_TYPE +#define INT_ELEM_TYPE __INT32_TYPE__ +#endif + +#define COMPARE(NAME, OP) \ + int_vec \ + cmp_##NAME##_reg (vec a, vec b) \ + { \ + return a OP b; \ + } + +typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16))); +typedef ELEM_TYPE vec __attribute__((vector_size(16))); + +COMPARE (eq, ==) +COMPARE (ne, !=) +COMPARE (lt, <) +COMPARE (le, <=) +COMPARE (gt, >) +COMPARE (ge, >=) + +/* eq, ne, lt, le, gt, ge. +/* { dg-final { scan-assembler-times {\tvcmp.f32\teq, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tne, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tlt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tle, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tgt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tge, q[0-9]+, q[0-9]+\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-scalar-1.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-scalar-1.c new file mode 100644 index 00000000000..77749723693 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-scalar-1.c @@ -0,0 +1,69 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#define COMPARE_REG(NAME, OP, TYPE, SCALAR) \ + TYPE \ + cmp_##NAME##_##TYPE##_scalar (TYPE a, SCALAR b) \ + { \ + return a OP b; \ + } + +#define COMPARE_TYPE(SCALAR, TYPE) \ + COMPARE_REG (eq, ==, TYPE, SCALAR) \ + COMPARE_REG (ne, !=, TYPE, SCALAR) \ + COMPARE_REG (lt, <, TYPE, SCALAR) \ + COMPARE_REG (le, <=, TYPE, SCALAR) \ + COMPARE_REG (gt, >, TYPE, SCALAR) \ + COMPARE_REG (ge, >=, TYPE, SCALAR) + +#define TEST_TYPE(NAME, ELEM, SIZE) \ + typedef ELEM NAME##SIZE __attribute__((vector_size(SIZE))); \ + COMPARE_TYPE (ELEM, NAME##SIZE) + +/* 64-bits vectors, not vectorized. */ +TEST_TYPE (vs8, __INT8_TYPE__, 8) +TEST_TYPE (vu8, __UINT8_TYPE__, 8) +TEST_TYPE (vs16, __INT16_TYPE__, 8) +TEST_TYPE (vu16, __UINT16_TYPE__, 8) +TEST_TYPE (vs32, __INT32_TYPE__, 8) +TEST_TYPE (vu32, __UINT32_TYPE__, 8) + +/* 128-bits vectors. */ +TEST_TYPE (vs8, __INT8_TYPE__, 16) +TEST_TYPE (vu8, __UINT8_TYPE__, 16) +TEST_TYPE (vs16, __INT16_TYPE__, 16) +TEST_TYPE (vu16, __UINT16_TYPE__, 16) +TEST_TYPE (vs32, __INT32_TYPE__, 16) +TEST_TYPE (vu32, __UINT32_TYPE__, 16) + +/* { 8 bits } x { eq, ne, lt, le, gt, ge, hi, cs }. +/* { dg-final { scan-assembler-times {\tvcmp.i8 eq, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.i8 ne, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s8 lt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s8 le, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s8 gt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s8 ge, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u8 hi, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u8 cs, q[0-9]+, q[0-9]+\n} 2 } } */ + +/* { 16 bits } x { eq, ne, lt, le, gt, ge, hi, cs }. +/* { dg-final { scan-assembler-times {\tvcmp.i16 eq, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.i16 ne, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s16 lt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s16 le, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s16 gt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s16 ge, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u16 hi, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u16 cs, q[0-9]+, q[0-9]+\n} 2 } } */ + +/* { 32 bits } x { eq, ne, lt, le, gt, ge, hi, cs }. +/* { dg-final { scan-assembler-times {\tvcmp.i32 eq, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.i32 ne, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s32 lt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s32 le, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s32 gt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s32 ge, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u32 hi, q[0-9]+, q[0-9]+\n} 2 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u32 cs, q[0-9]+, q[0-9]+\n} 2 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f32.c b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f32.c new file mode 100644 index 00000000000..4ed449e7d52 --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f32.c @@ -0,0 +1,30 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */ +/* { dg-add-options arm_v8_1m_mve_fp } */ +/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */ + +#include <stdint.h> + +#define NB 4 + +#define FUNC(OP, NAME) \ + void test_ ## NAME ##_f (float * __restrict__ dest, float *a, float *b) { \ + int i; \ + for (i=0; i<NB; i++) { \ + dest[i] = a[i] OP b[i]; \ + } \ + } + +FUNC(==, vcmpeq) +FUNC(!=, vcmpne) +FUNC(<, vcmplt) +FUNC(<=, vcmple) +FUNC(>, vcmpgt) +FUNC(>=, vcmpge) + +/* { dg-final { scan-assembler-times {\tvcmp.f32\teq, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tne, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tlt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tle, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tgt, q[0-9]+, q[0-9]+\n} 1 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.f32\tge, q[0-9]+, q[0-9]+\n} 1 } } */ diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vcmp.c b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp.c new file mode 100644 index 00000000000..8da15e762eb --- /dev/null +++ b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp.c @@ -0,0 +1,50 @@ +/* { dg-do assemble } */ +/* { dg-require-effective-target arm_v8_1m_mve_ok } */ +/* { dg-add-options arm_v8_1m_mve } */ +/* { dg-additional-options "-O3" } */ + +#include <stdint.h> + +#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME) \ + void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \ + int i; \ + for (i=0; i<NB; i++) { \ + dest[i] = a[i] OP b[i]; \ + } \ +} + +#define ALL_FUNCS(OP, NAME) \ + FUNC(s, int, 32, 2, OP, NAME) \ + FUNC(u, uint, 32, 2, OP, NAME) \ + FUNC(s, int, 16, 4, OP, NAME) \ + FUNC(u, uint, 16, 4, OP, NAME) \ + FUNC(s, int, 8, 8, OP, NAME) \ + FUNC(u, uint, 8, 8, OP, NAME) \ + FUNC(s, int, 32, 4, OP, NAME) \ + FUNC(u, uint, 32, 4, OP, NAME) \ + FUNC(s, int, 16, 8, OP, NAME) \ + FUNC(u, uint, 16, 8, OP, NAME) \ + FUNC(s, int, 8, 16, OP, NAME) \ + FUNC(u, uint, 8, 16, OP, NAME) + +ALL_FUNCS(==, vcmpeq) +ALL_FUNCS(!=, vcmpne) +ALL_FUNCS(<, vcmplt) +ALL_FUNCS(<=, vcmple) +ALL_FUNCS(>, vcmpgt) +ALL_FUNCS(>=, vcmpge) + +/* MVE has only 128-bit vectors, so we can vectorize only half of the + functions above. */ +/* { dg-final { scan-assembler-times {\tvcmp.i[0-9]+ eq, q[0-9]+, q[0-9]+\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.i[0-9]+ ne, q[0-9]+, q[0-9]+\n} 6 } } */ + +/* lt, le, gt, ge apply to signed types, cs and hi to unsigned types. */ +/* lt and le with unsigned types are replaced with the opposite condition, hence + the double number of matches for cs and hi. */ +/* { dg-final { scan-assembler-times {\tvcmp.s[0-9]+ lt, q[0-9]+, q[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s[0-9]+ le, q[0-9]+, q[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s[0-9]+ gt, q[0-9]+, q[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.s[0-9]+ ge, q[0-9]+, q[0-9]+\n} 3 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u[0-9]+ cs, q[0-9]+, q[0-9]+\n} 6 } } */ +/* { dg-final { scan-assembler-times {\tvcmp.u[0-9]+ hi, q[0-9]+, q[0-9]+\n} 6 } } */ |