summaryrefslogtreecommitdiff
path: root/gcc
diff options
context:
space:
mode:
Diffstat (limited to 'gcc')
-rw-r--r--gcc/config/arm/arm-protos.h2
-rw-r--r--gcc/config/arm/arm.c211
-rw-r--r--gcc/config/arm/iterators.md9
-rw-r--r--gcc/config/arm/mve.md10
-rw-r--r--gcc/config/arm/neon.md87
-rw-r--r--gcc/config/arm/unspecs.md20
-rw-r--r--gcc/config/arm/vec-common.md108
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c80
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/mve-compare-2.c38
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/mve-compare-scalar-1.c69
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f32.c30
-rw-r--r--gcc/testsuite/gcc.target/arm/simd/mve-vcmp.c50
12 files changed, 548 insertions, 166 deletions
diff --git a/gcc/config/arm/arm-protos.h b/gcc/config/arm/arm-protos.h
index 25215417751..ffccaa77377 100644
--- a/gcc/config/arm/arm-protos.h
+++ b/gcc/config/arm/arm-protos.h
@@ -373,7 +373,7 @@ extern void arm_emit_coreregs_64bit_shift (enum rtx_code, rtx, rtx, rtx, rtx,
extern bool arm_fusion_enabled_p (tune_params::fuse_ops);
extern bool arm_valid_symbolic_address_p (rtx);
extern bool arm_validize_comparison (rtx *, rtx *, rtx *);
-extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool);
+extern bool arm_expand_vector_compare (rtx, rtx_code, rtx, rtx, bool, bool);
#endif /* RTX_CODE */
extern bool arm_gen_setmem (rtx *);
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index d0c0c50be97..eee3671848f 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -30959,66 +30959,113 @@ arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
and return true if TARGET contains the inverse. If !CAN_INVERT,
always store the result in TARGET, never its inverse.
+ If VCOND_MVE, do not emit the vpsel instruction here, let arm_expand_vcond do
+ it with the right destination type to avoid emiting two vpsel, one here and
+ one in arm_expand_vcond.
+
Note that the handling of floating-point comparisons is not
IEEE compliant. */
bool
arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
- bool can_invert)
+ bool can_invert, bool vcond_mve)
{
machine_mode cmp_result_mode = GET_MODE (target);
machine_mode cmp_mode = GET_MODE (op0);
bool inverted;
- switch (code)
- {
- /* For these we need to compute the inverse of the requested
- comparison. */
- case UNORDERED:
- case UNLT:
- case UNLE:
- case UNGT:
- case UNGE:
- case UNEQ:
- case NE:
- code = reverse_condition_maybe_unordered (code);
- if (!can_invert)
- {
- /* Recursively emit the inverted comparison into a temporary
- and then store its inverse in TARGET. This avoids reusing
- TARGET (which for integer NE could be one of the inputs). */
- rtx tmp = gen_reg_rtx (cmp_result_mode);
- if (arm_expand_vector_compare (tmp, code, op0, op1, true))
- gcc_unreachable ();
- emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
- return false;
- }
- inverted = true;
- break;
- default:
+ /* MVE supports more comparisons than Neon. */
+ if (TARGET_HAVE_MVE)
inverted = false;
- break;
- }
+ else
+ switch (code)
+ {
+ /* For these we need to compute the inverse of the requested
+ comparison. */
+ case UNORDERED:
+ case UNLT:
+ case UNLE:
+ case UNGT:
+ case UNGE:
+ case UNEQ:
+ case NE:
+ code = reverse_condition_maybe_unordered (code);
+ if (!can_invert)
+ {
+ /* Recursively emit the inverted comparison into a temporary
+ and then store its inverse in TARGET. This avoids reusing
+ TARGET (which for integer NE could be one of the inputs). */
+ rtx tmp = gen_reg_rtx (cmp_result_mode);
+ if (arm_expand_vector_compare (tmp, code, op0, op1, true, vcond_mve))
+ gcc_unreachable ();
+ emit_insn (gen_rtx_SET (target, gen_rtx_NOT (cmp_result_mode, tmp)));
+ return false;
+ }
+ inverted = true;
+ break;
+
+ default:
+ inverted = false;
+ break;
+ }
switch (code)
{
- /* These are natively supported for zero comparisons, but otherwise
- require the operands to be swapped. */
+ /* These are natively supported by Neon for zero comparisons, but otherwise
+ require the operands to be swapped. For MVE, we can only compare
+ registers. */
case LE:
case LT:
- if (op1 != CONST0_RTX (cmp_mode))
- {
- code = swap_condition (code);
- std::swap (op0, op1);
- }
+ if (!TARGET_HAVE_MVE)
+ if (op1 != CONST0_RTX (cmp_mode))
+ {
+ code = swap_condition (code);
+ std::swap (op0, op1);
+ }
/* Fall through. */
- /* These are natively supported for both register and zero operands. */
+ /* These are natively supported by Neon for both register and zero
+ operands. MVE supports registers only. */
case EQ:
case GE:
case GT:
- emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
+ case NE:
+ if (TARGET_HAVE_MVE)
+ {
+ rtx vpr_p0;
+ if (vcond_mve)
+ vpr_p0 = target;
+ else
+ vpr_p0 = gen_reg_rtx (HImode);
+
+ switch (GET_MODE_CLASS (cmp_mode))
+ {
+ case MODE_VECTOR_INT:
+ emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+ break;
+ case MODE_VECTOR_FLOAT:
+ if (TARGET_HAVE_MVE_FLOAT)
+ emit_insn (gen_mve_vcmpq_f (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+ else
+ gcc_unreachable ();
+ break;
+ default:
+ gcc_unreachable ();
+ }
+
+ /* If we are not expanding a vcond, build the result here. */
+ if (!vcond_mve)
+ {
+ rtx zero = gen_reg_rtx (cmp_result_mode);
+ rtx one = gen_reg_rtx (cmp_result_mode);
+ emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
+ emit_move_insn (one, CONST1_RTX (cmp_result_mode));
+ emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
+ }
+ }
+ else
+ emit_insn (gen_neon_vc (code, cmp_mode, target, op0, op1));
return inverted;
/* These are natively supported for register operands only.
@@ -31026,16 +31073,54 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
or canonicalized by target-independent code. */
case GEU:
case GTU:
- emit_insn (gen_neon_vc (code, cmp_mode, target,
- op0, force_reg (cmp_mode, op1)));
+ if (TARGET_HAVE_MVE)
+ {
+ rtx vpr_p0;
+ if (vcond_mve)
+ vpr_p0 = target;
+ else
+ vpr_p0 = gen_reg_rtx (HImode);
+
+ emit_insn (gen_mve_vcmpq (code, cmp_mode, vpr_p0, op0, force_reg (cmp_mode, op1)));
+ if (!vcond_mve)
+ {
+ rtx zero = gen_reg_rtx (cmp_result_mode);
+ rtx one = gen_reg_rtx (cmp_result_mode);
+ emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
+ emit_move_insn (one, CONST1_RTX (cmp_result_mode));
+ emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
+ }
+ }
+ else
+ emit_insn (gen_neon_vc (code, cmp_mode, target,
+ op0, force_reg (cmp_mode, op1)));
return inverted;
/* These require the operands to be swapped and likewise do not
support comparisons with zero. */
case LEU:
case LTU:
- emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
- target, force_reg (cmp_mode, op1), op0));
+ if (TARGET_HAVE_MVE)
+ {
+ rtx vpr_p0;
+ if (vcond_mve)
+ vpr_p0 = target;
+ else
+ vpr_p0 = gen_reg_rtx (HImode);
+
+ emit_insn (gen_mve_vcmpq (swap_condition (code), cmp_mode, vpr_p0, force_reg (cmp_mode, op1), op0));
+ if (!vcond_mve)
+ {
+ rtx zero = gen_reg_rtx (cmp_result_mode);
+ rtx one = gen_reg_rtx (cmp_result_mode);
+ emit_move_insn (zero, CONST0_RTX (cmp_result_mode));
+ emit_move_insn (one, CONST1_RTX (cmp_result_mode));
+ emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, target, one, zero, vpr_p0));
+ }
+ }
+ else
+ emit_insn (gen_neon_vc (swap_condition (code), cmp_mode,
+ target, force_reg (cmp_mode, op1), op0));
return inverted;
/* These need a combination of two comparisons. */
@@ -31047,8 +31132,8 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
rtx gt_res = gen_reg_rtx (cmp_result_mode);
rtx alt_res = gen_reg_rtx (cmp_result_mode);
rtx_code alt_code = (code == LTGT ? LT : LE);
- if (arm_expand_vector_compare (gt_res, GT, op0, op1, true)
- || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true))
+ if (arm_expand_vector_compare (gt_res, GT, op0, op1, true, vcond_mve)
+ || arm_expand_vector_compare (alt_res, alt_code, op0, op1, true, vcond_mve))
gcc_unreachable ();
emit_insn (gen_rtx_SET (target, gen_rtx_IOR (cmp_result_mode,
gt_res, alt_res)));
@@ -31066,13 +31151,47 @@ arm_expand_vector_compare (rtx target, rtx_code code, rtx op0, rtx op1,
void
arm_expand_vcond (rtx *operands, machine_mode cmp_result_mode)
{
- rtx mask = gen_reg_rtx (cmp_result_mode);
+ /* When expanding for MVE, we do not want to emit a (useless) vpsel in
+ arm_expand_vector_compare, and another one here. */
+ bool vcond_mve=false;
+ rtx mask;
+
+ if (TARGET_HAVE_MVE)
+ {
+ vcond_mve=true;
+ mask = gen_reg_rtx (HImode);
+ }
+ else
+ mask = gen_reg_rtx (cmp_result_mode);
+
bool inverted = arm_expand_vector_compare (mask, GET_CODE (operands[3]),
- operands[4], operands[5], true);
+ operands[4], operands[5], true, vcond_mve);
if (inverted)
std::swap (operands[1], operands[2]);
+ if (TARGET_NEON)
emit_insn (gen_neon_vbsl (GET_MODE (operands[0]), operands[0],
mask, operands[1], operands[2]));
+ else
+ {
+ machine_mode cmp_mode = GET_MODE (operands[4]);
+ rtx vpr_p0 = mask;
+ rtx zero = gen_reg_rtx (cmp_mode);
+ rtx one = gen_reg_rtx (cmp_mode);
+ emit_move_insn (zero, CONST0_RTX (cmp_mode));
+ emit_move_insn (one, CONST1_RTX (cmp_mode));
+ switch (GET_MODE_CLASS (cmp_mode))
+ {
+ case MODE_VECTOR_INT:
+ emit_insn (gen_mve_vpselq (VPSELQ_S, cmp_result_mode, operands[0], one, zero, vpr_p0));
+ break;
+ case MODE_VECTOR_FLOAT:
+ if (TARGET_HAVE_MVE_FLOAT)
+ emit_insn (gen_mve_vpselq_f (cmp_mode, operands[0], one, zero, vpr_p0));
+ break;
+ default:
+ gcc_unreachable ();
+ }
+ }
}
#define MAX_VECT_LEN 16
diff --git a/gcc/config/arm/iterators.md b/gcc/config/arm/iterators.md
index 95df8bdf77d..a128465feea 100644
--- a/gcc/config/arm/iterators.md
+++ b/gcc/config/arm/iterators.md
@@ -1288,12 +1288,11 @@
(VCREATEQ_U "u") (VCREATEQ_S "s") (VSHRQ_N_S "s")
(VSHRQ_N_U "u") (VCVTQ_N_FROM_F_S "s") (VSHLQ_U "u")
(VCVTQ_N_FROM_F_U "u") (VADDLVQ_P_S "s") (VSHLQ_S "s")
- (VADDLVQ_P_U "u") (VCMPNEQ_S "s")
+ (VADDLVQ_P_U "u")
(VABDQ_M_S "s") (VABDQ_M_U "u") (VABDQ_S "s")
(VABDQ_U "u") (VADDQ_N_S "s") (VADDQ_N_U "u")
(VADDVQ_P_S "s") (VADDVQ_P_U "u") (VBRSRQ_N_S "s")
- (VBRSRQ_N_U "u") (VCMPEQQ_S "s")
- (VCMPEQQ_N_S "s") (VCMPNEQ_N_S "s")
+ (VBRSRQ_N_U "u")
(VHADDQ_N_S "s") (VHADDQ_N_U "u") (VHADDQ_S "s")
(VHADDQ_U "u") (VHSUBQ_N_S "s") (VHSUBQ_N_U "u")
(VHSUBQ_S "s") (VMAXQ_S "s") (VMAXQ_U "u") (VHSUBQ_U "u")
@@ -1549,16 +1548,12 @@
(define_int_iterator VSHRQ_N [VSHRQ_N_S VSHRQ_N_U])
(define_int_iterator VCVTQ_N_FROM_F [VCVTQ_N_FROM_F_S VCVTQ_N_FROM_F_U])
(define_int_iterator VADDLVQ_P [VADDLVQ_P_S VADDLVQ_P_U])
-(define_int_iterator VCMPNEQ [VCMPNEQ_S])
(define_int_iterator VSHLQ [VSHLQ_S VSHLQ_U])
(define_int_iterator VABDQ [VABDQ_S VABDQ_U])
(define_int_iterator VADDQ_N [VADDQ_N_S VADDQ_N_U])
(define_int_iterator VADDVAQ [VADDVAQ_S VADDVAQ_U])
(define_int_iterator VADDVQ_P [VADDVQ_P_U VADDVQ_P_S])
(define_int_iterator VBRSRQ_N [VBRSRQ_N_U VBRSRQ_N_S])
-(define_int_iterator VCMPEQQ [VCMPEQQ_S])
-(define_int_iterator VCMPEQQ_N [VCMPEQQ_N_S])
-(define_int_iterator VCMPNEQ_N [VCMPNEQ_N_S])
(define_int_iterator VHADDQ [VHADDQ_S VHADDQ_U])
(define_int_iterator VHADDQ_N [VHADDQ_N_U VHADDQ_N_S])
(define_int_iterator VHSUBQ [VHSUBQ_S VHSUBQ_U])
diff --git a/gcc/config/arm/mve.md b/gcc/config/arm/mve.md
index 45df2110ae9..133ebe93cf3 100644
--- a/gcc/config/arm/mve.md
+++ b/gcc/config/arm/mve.md
@@ -813,7 +813,7 @@
;;
;; [vcmpneq_, vcmpcsq_, vcmpeqq_, vcmpgeq_, vcmpgtq_, vcmphiq_, vcmpleq_, vcmpltq_])
;;
-(define_insn "mve_vcmp<mve_cmp_op>q_<mode>"
+(define_insn "@mve_vcmp<mve_cmp_op>q_<mode>"
[
(set (match_operand:HI 0 "vpr_register_operand" "=Up")
(MVE_COMPARISONS:HI (match_operand:MVE_2 1 "s_register_operand" "w")
@@ -1903,7 +1903,7 @@
;;
;; [vcmpeqq_f, vcmpgeq_f, vcmpgtq_f, vcmpleq_f, vcmpltq_f, vcmpneq_f])
;;
-(define_insn "mve_vcmp<mve_cmp_op>q_f<mode>"
+(define_insn "@mve_vcmp<mve_cmp_op>q_f<mode>"
[
(set (match_operand:HI 0 "vpr_register_operand" "=Up")
(MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 "s_register_operand" "w")
@@ -1917,7 +1917,7 @@
;;
;; [vcmpeqq_n_f, vcmpgeq_n_f, vcmpgtq_n_f, vcmpleq_n_f, vcmpltq_n_f, vcmpneq_n_f])
;;
-(define_insn "mve_vcmp<mve_cmp_op>q_n_f<mode>"
+(define_insn "@mve_vcmp<mve_cmp_op>q_n_f<mode>"
[
(set (match_operand:HI 0 "vpr_register_operand" "=Up")
(MVE_FP_COMPARISONS:HI (match_operand:MVE_0 1 "s_register_operand" "w")
@@ -3282,7 +3282,7 @@
;;
;; [vpselq_u, vpselq_s])
;;
-(define_insn "mve_vpselq_<supf><mode>"
+(define_insn "@mve_vpselq_<supf><mode>"
[
(set (match_operand:MVE_1 0 "s_register_operand" "=w")
(unspec:MVE_1 [(match_operand:MVE_1 1 "s_register_operand" "w")
@@ -4377,7 +4377,7 @@
;;
;; [vpselq_f])
;;
-(define_insn "mve_vpselq_f<mode>"
+(define_insn "@mve_vpselq_f<mode>"
[
(set (match_operand:MVE_0 0 "s_register_operand" "=w")
(unspec:MVE_0 [(match_operand:MVE_0 1 "s_register_operand" "w")
diff --git a/gcc/config/arm/neon.md b/gcc/config/arm/neon.md
index 641d26fed47..cc82d068a1c 100644
--- a/gcc/config/arm/neon.md
+++ b/gcc/config/arm/neon.md
@@ -1416,93 +1416,6 @@
[(set_attr "type" "neon_qsub<q>")]
)
-(define_expand "vec_cmp<mode><v_cmp_result>"
- [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
- (match_operator:<V_cmp_result> 1 "comparison_operator"
- [(match_operand:VDQW 2 "s_register_operand")
- (match_operand:VDQW 3 "reg_or_zero_operand")]))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
- arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3], false);
- DONE;
-})
-
-(define_expand "vec_cmpu<mode><mode>"
- [(set (match_operand:VDQIW 0 "s_register_operand")
- (match_operator:VDQIW 1 "comparison_operator"
- [(match_operand:VDQIW 2 "s_register_operand")
- (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
- "TARGET_NEON"
-{
- arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
- operands[2], operands[3], false);
- DONE;
-})
-
-;; Conditional instructions. These are comparisons with conditional moves for
-;; vectors. They perform the assignment:
-;;
-;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
-;;
-;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
-;; element-wise.
-
-(define_expand "vcond<mode><mode>"
- [(set (match_operand:VDQW 0 "s_register_operand")
- (if_then_else:VDQW
- (match_operator 3 "comparison_operator"
- [(match_operand:VDQW 4 "s_register_operand")
- (match_operand:VDQW 5 "reg_or_zero_operand")])
- (match_operand:VDQW 1 "s_register_operand")
- (match_operand:VDQW 2 "s_register_operand")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
- arm_expand_vcond (operands, <V_cmp_result>mode);
- DONE;
-})
-
-(define_expand "vcond<V_cvtto><mode>"
- [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
- (if_then_else:<V_CVTTO>
- (match_operator 3 "comparison_operator"
- [(match_operand:V32 4 "s_register_operand")
- (match_operand:V32 5 "reg_or_zero_operand")])
- (match_operand:<V_CVTTO> 1 "s_register_operand")
- (match_operand:<V_CVTTO> 2 "s_register_operand")))]
- "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
-{
- arm_expand_vcond (operands, <V_cmp_result>mode);
- DONE;
-})
-
-(define_expand "vcondu<mode><v_cmp_result>"
- [(set (match_operand:VDQW 0 "s_register_operand")
- (if_then_else:VDQW
- (match_operator 3 "arm_comparison_operator"
- [(match_operand:<V_cmp_result> 4 "s_register_operand")
- (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
- (match_operand:VDQW 1 "s_register_operand")
- (match_operand:VDQW 2 "s_register_operand")))]
- "TARGET_NEON"
-{
- arm_expand_vcond (operands, <V_cmp_result>mode);
- DONE;
-})
-
-(define_expand "vcond_mask_<mode><v_cmp_result>"
- [(set (match_operand:VDQW 0 "s_register_operand")
- (if_then_else:VDQW
- (match_operand:<V_cmp_result> 3 "s_register_operand")
- (match_operand:VDQW 1 "s_register_operand")
- (match_operand:VDQW 2 "s_register_operand")))]
- "TARGET_NEON"
-{
- emit_insn (gen_neon_vbsl<mode> (operands[0], operands[3], operands[1],
- operands[2]));
- DONE;
-})
-
;; Patterns for builtins.
; good for plain vadd, vaddq.
diff --git a/gcc/config/arm/unspecs.md b/gcc/config/arm/unspecs.md
index 07ca53b8b0b..0778db1bf4f 100644
--- a/gcc/config/arm/unspecs.md
+++ b/gcc/config/arm/unspecs.md
@@ -596,8 +596,6 @@
VCVTQ_N_FROM_F_U
VADDLVQ_P_S
VADDLVQ_P_U
- VCMPNEQ_U
- VCMPNEQ_S
VSHLQ_S
VSHLQ_U
VABDQ_S
@@ -605,9 +603,6 @@
VADDVAQ_S
VADDVQ_P_S
VBRSRQ_N_S
- VCMPEQQ_S
- VCMPEQQ_N_S
- VCMPNEQ_N_S
VHADDQ_S
VHADDQ_N_S
VHSUBQ_S
@@ -645,9 +640,6 @@
VADDVAQ_U
VADDVQ_P_U
VBRSRQ_N_U
- VCMPEQQ_U
- VCMPEQQ_N_U
- VCMPNEQ_N_U
VHADDQ_U
VHADDQ_N_U
VHSUBQ_U
@@ -680,14 +672,6 @@
VSHLQ_R_U
VSUBQ_U
VSUBQ_N_U
- VCMPGEQ_N_S
- VCMPGEQ_S
- VCMPGTQ_N_S
- VCMPGTQ_S
- VCMPLEQ_N_S
- VCMPLEQ_S
- VCMPLTQ_N_S
- VCMPLTQ_S
VHCADDQ_ROT270_S
VHCADDQ_ROT90_S
VMAXAQ_S
@@ -702,10 +686,6 @@
VQRDMULHQ_N_S
VQRDMULHQ_S
VQSHLUQ_N_S
- VCMPCSQ_N_U
- VCMPCSQ_U
- VCMPHIQ_N_U
- VCMPHIQ_U
VABDQ_M_S
VABDQ_M_U
VABDQ_F
diff --git a/gcc/config/arm/vec-common.md b/gcc/config/arm/vec-common.md
index 0b2b3b1c9ce..448731f7be9 100644
--- a/gcc/config/arm/vec-common.md
+++ b/gcc/config/arm/vec-common.md
@@ -362,3 +362,111 @@
DONE;
}
})
+
+(define_expand "vec_cmp<mode><v_cmp_result>"
+ [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
+ (match_operator:<V_cmp_result> 1 "comparison_operator"
+ [(match_operand:VDQW 2 "s_register_operand")
+ (match_operand:VDQW 3 "reg_or_zero_operand")]))]
+ "ARM_HAVE_<MODE>_ARITH
+ && !TARGET_REALLY_IWMMXT
+ && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false, false);
+ DONE;
+})
+
+(define_expand "vec_cmpu<mode><mode>"
+ [(set (match_operand:VDQIW 0 "s_register_operand")
+ (match_operator:VDQIW 1 "comparison_operator"
+ [(match_operand:VDQIW 2 "s_register_operand")
+ (match_operand:VDQIW 3 "reg_or_zero_operand")]))]
+ "ARM_HAVE_<MODE>_ARITH
+ && !TARGET_REALLY_IWMMXT"
+{
+ arm_expand_vector_compare (operands[0], GET_CODE (operands[1]),
+ operands[2], operands[3], false, false);
+ DONE;
+})
+
+;; Conditional instructions. These are comparisons with conditional moves for
+;; vectors. They perform the assignment:
+;;
+;; Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
+;;
+;; where op3 is <, <=, ==, !=, >= or >. Operations are performed
+;; element-wise.
+
+(define_expand "vcond<mode><mode>"
+ [(set (match_operand:VDQW 0 "s_register_operand")
+ (if_then_else:VDQW
+ (match_operator 3 "comparison_operator"
+ [(match_operand:VDQW 4 "s_register_operand")
+ (match_operand:VDQW 5 "reg_or_zero_operand")])
+ (match_operand:VDQW 1 "s_register_operand")
+ (match_operand:VDQW 2 "s_register_operand")))]
+ "ARM_HAVE_<MODE>_ARITH
+ && !TARGET_REALLY_IWMMXT
+ && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vcond (operands, <V_cmp_result>mode);
+ DONE;
+})
+
+(define_expand "vcond<V_cvtto><mode>"
+ [(set (match_operand:<V_CVTTO> 0 "s_register_operand")
+ (if_then_else:<V_CVTTO>
+ (match_operator 3 "comparison_operator"
+ [(match_operand:V32 4 "s_register_operand")
+ (match_operand:V32 5 "reg_or_zero_operand")])
+ (match_operand:<V_CVTTO> 1 "s_register_operand")
+ (match_operand:<V_CVTTO> 2 "s_register_operand")))]
+ "ARM_HAVE_<MODE>_ARITH
+ && !TARGET_REALLY_IWMMXT
+ && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ arm_expand_vcond (operands, <V_cmp_result>mode);
+ DONE;
+})
+
+(define_expand "vcondu<mode><v_cmp_result>"
+ [(set (match_operand:VDQW 0 "s_register_operand")
+ (if_then_else:VDQW
+ (match_operator 3 "arm_comparison_operator"
+ [(match_operand:<V_cmp_result> 4 "s_register_operand")
+ (match_operand:<V_cmp_result> 5 "reg_or_zero_operand")])
+ (match_operand:VDQW 1 "s_register_operand")
+ (match_operand:VDQW 2 "s_register_operand")))]
+ "ARM_HAVE_<MODE>_ARITH
+ && !TARGET_REALLY_IWMMXT"
+{
+ arm_expand_vcond (operands, <V_cmp_result>mode);
+ DONE;
+})
+
+(define_expand "vcond_mask_<mode><v_cmp_result>"
+ [(set (match_operand:VDQW 0 "s_register_operand")
+ (if_then_else:VDQW
+ (match_operand:<V_cmp_result> 3 "s_register_operand")
+ (match_operand:VDQW 1 "s_register_operand")
+ (match_operand:VDQW 2 "s_register_operand")))]
+ "ARM_HAVE_<MODE>_ARITH
+ && !TARGET_REALLY_IWMMXT
+ && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
+{
+ if (TARGET_NEON)
+ {
+ emit_insn (gen_neon_vbsl (<MODE>mode, operands[0], operands[3],
+ operands[1], operands[2]));
+ }
+ else if (TARGET_HAVE_MVE)
+ {
+ emit_insn (gen_mve_vpselq (VPSELQ_S, <MODE>mode, operands[0],
+ operands[1], operands[2], operands[3]));
+ }
+ else
+ gcc_unreachable ();
+
+ DONE;
+})
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c
new file mode 100644
index 00000000000..029c931f47f
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-1.c
@@ -0,0 +1,80 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+/* Integer tests. */
+
+#define COMPARE_REG(NAME, OP, TYPE) \
+ TYPE \
+ cmp_##NAME##_##TYPE##_reg (TYPE a, TYPE b) \
+ { \
+ return a OP b; \
+ }
+
+#define COMPARE_REG_AND_ZERO(NAME, OP, TYPE) \
+ COMPARE_REG (NAME, OP, TYPE) \
+ \
+ TYPE \
+ cmp_##NAME##_##TYPE##_zero (TYPE a) \
+ { \
+ return a OP (TYPE) {}; \
+ }
+
+#define COMPARE_TYPE(TYPE, COMPARE_ORDERED) \
+ COMPARE_REG_AND_ZERO (eq, ==, TYPE) \
+ COMPARE_REG_AND_ZERO (ne, !=, TYPE) \
+ COMPARE_ORDERED (lt, <, TYPE) \
+ COMPARE_ORDERED (le, <=, TYPE) \
+ COMPARE_ORDERED (gt, >, TYPE) \
+ COMPARE_ORDERED (ge, >=, TYPE)
+
+#define TEST_TYPE(NAME, ELEM, COMPARE_ORDERED, SIZE) \
+ typedef ELEM NAME##SIZE __attribute__((vector_size(SIZE))); \
+ COMPARE_TYPE (NAME##SIZE, COMPARE_ORDERED)
+
+/* 64-bits vectors, not vectorized. */
+TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO, 8)
+TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG, 8)
+TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO, 8)
+TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG, 8)
+TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO, 8)
+TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG, 8)
+
+/* 128-bits vectors. */
+TEST_TYPE (vs8, __INT8_TYPE__, COMPARE_REG_AND_ZERO, 16)
+TEST_TYPE (vu8, __UINT8_TYPE__, COMPARE_REG, 16)
+TEST_TYPE (vs16, __INT16_TYPE__, COMPARE_REG_AND_ZERO, 16)
+TEST_TYPE (vu16, __UINT16_TYPE__, COMPARE_REG, 16)
+TEST_TYPE (vs32, __INT32_TYPE__, COMPARE_REG_AND_ZERO, 16)
+TEST_TYPE (vu32, __UINT32_TYPE__, COMPARE_REG, 16)
+
+/* { 8 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i8 eq, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i8 ne, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8 lt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8 le, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8 gt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8 ge, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u8 hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u8 cs, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { 16 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i16 eq, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i16 ne, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16 lt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16 le, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16 gt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16 ge, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u16 hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u16 cs, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { 32 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i32 eq, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i32 ne, q[0-9]+, q[0-9]+\n} 4 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32 lt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32 le, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32 gt, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32 ge, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u32 hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u32 cs, q[0-9]+, q[0-9]+\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-2.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-2.c
new file mode 100644
index 00000000000..8515195ec87
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-2.c
@@ -0,0 +1,38 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
+
+/* float 32 tests. */
+
+#ifndef ELEM_TYPE
+#define ELEM_TYPE float
+#endif
+#ifndef INT_ELEM_TYPE
+#define INT_ELEM_TYPE __INT32_TYPE__
+#endif
+
+#define COMPARE(NAME, OP) \
+ int_vec \
+ cmp_##NAME##_reg (vec a, vec b) \
+ { \
+ return a OP b; \
+ }
+
+typedef INT_ELEM_TYPE int_vec __attribute__((vector_size(16)));
+typedef ELEM_TYPE vec __attribute__((vector_size(16)));
+
+COMPARE (eq, ==)
+COMPARE (ne, !=)
+COMPARE (lt, <)
+COMPARE (le, <=)
+COMPARE (gt, >)
+COMPARE (ge, >=)
+
+/* eq, ne, lt, le, gt, ge.
+/* { dg-final { scan-assembler-times {\tvcmp.f32\teq, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tne, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tlt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tle, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tgt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tge, q[0-9]+, q[0-9]+\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-compare-scalar-1.c b/gcc/testsuite/gcc.target/arm/simd/mve-compare-scalar-1.c
new file mode 100644
index 00000000000..77749723693
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-compare-scalar-1.c
@@ -0,0 +1,69 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#define COMPARE_REG(NAME, OP, TYPE, SCALAR) \
+ TYPE \
+ cmp_##NAME##_##TYPE##_scalar (TYPE a, SCALAR b) \
+ { \
+ return a OP b; \
+ }
+
+#define COMPARE_TYPE(SCALAR, TYPE) \
+ COMPARE_REG (eq, ==, TYPE, SCALAR) \
+ COMPARE_REG (ne, !=, TYPE, SCALAR) \
+ COMPARE_REG (lt, <, TYPE, SCALAR) \
+ COMPARE_REG (le, <=, TYPE, SCALAR) \
+ COMPARE_REG (gt, >, TYPE, SCALAR) \
+ COMPARE_REG (ge, >=, TYPE, SCALAR)
+
+#define TEST_TYPE(NAME, ELEM, SIZE) \
+ typedef ELEM NAME##SIZE __attribute__((vector_size(SIZE))); \
+ COMPARE_TYPE (ELEM, NAME##SIZE)
+
+/* 64-bits vectors, not vectorized. */
+TEST_TYPE (vs8, __INT8_TYPE__, 8)
+TEST_TYPE (vu8, __UINT8_TYPE__, 8)
+TEST_TYPE (vs16, __INT16_TYPE__, 8)
+TEST_TYPE (vu16, __UINT16_TYPE__, 8)
+TEST_TYPE (vs32, __INT32_TYPE__, 8)
+TEST_TYPE (vu32, __UINT32_TYPE__, 8)
+
+/* 128-bits vectors. */
+TEST_TYPE (vs8, __INT8_TYPE__, 16)
+TEST_TYPE (vu8, __UINT8_TYPE__, 16)
+TEST_TYPE (vs16, __INT16_TYPE__, 16)
+TEST_TYPE (vu16, __UINT16_TYPE__, 16)
+TEST_TYPE (vs32, __INT32_TYPE__, 16)
+TEST_TYPE (vu32, __UINT32_TYPE__, 16)
+
+/* { 8 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i8 eq, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i8 ne, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8 lt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8 le, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8 gt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s8 ge, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u8 hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u8 cs, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { 16 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i16 eq, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i16 ne, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16 lt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16 le, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16 gt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s16 ge, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u16 hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u16 cs, q[0-9]+, q[0-9]+\n} 2 } } */
+
+/* { 32 bits } x { eq, ne, lt, le, gt, ge, hi, cs }.
+/* { dg-final { scan-assembler-times {\tvcmp.i32 eq, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i32 ne, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32 lt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32 le, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32 gt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s32 ge, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u32 hi, q[0-9]+, q[0-9]+\n} 2 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u32 cs, q[0-9]+, q[0-9]+\n} 2 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f32.c b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f32.c
new file mode 100644
index 00000000000..4ed449e7d52
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp-f32.c
@@ -0,0 +1,30 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_fp_ok } */
+/* { dg-add-options arm_v8_1m_mve_fp } */
+/* { dg-additional-options "-O3 -funsafe-math-optimizations" } */
+
+#include <stdint.h>
+
+#define NB 4
+
+#define FUNC(OP, NAME) \
+ void test_ ## NAME ##_f (float * __restrict__ dest, float *a, float *b) { \
+ int i; \
+ for (i=0; i<NB; i++) { \
+ dest[i] = a[i] OP b[i]; \
+ } \
+ }
+
+FUNC(==, vcmpeq)
+FUNC(!=, vcmpne)
+FUNC(<, vcmplt)
+FUNC(<=, vcmple)
+FUNC(>, vcmpgt)
+FUNC(>=, vcmpge)
+
+/* { dg-final { scan-assembler-times {\tvcmp.f32\teq, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tne, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tlt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tle, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tgt, q[0-9]+, q[0-9]+\n} 1 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.f32\tge, q[0-9]+, q[0-9]+\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/arm/simd/mve-vcmp.c b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp.c
new file mode 100644
index 00000000000..8da15e762eb
--- /dev/null
+++ b/gcc/testsuite/gcc.target/arm/simd/mve-vcmp.c
@@ -0,0 +1,50 @@
+/* { dg-do assemble } */
+/* { dg-require-effective-target arm_v8_1m_mve_ok } */
+/* { dg-add-options arm_v8_1m_mve } */
+/* { dg-additional-options "-O3" } */
+
+#include <stdint.h>
+
+#define FUNC(SIGN, TYPE, BITS, NB, OP, NAME) \
+ void test_ ## NAME ##_ ## SIGN ## BITS ## x ## NB (TYPE##BITS##_t * __restrict__ dest, TYPE##BITS##_t *a, TYPE##BITS##_t *b) { \
+ int i; \
+ for (i=0; i<NB; i++) { \
+ dest[i] = a[i] OP b[i]; \
+ } \
+}
+
+#define ALL_FUNCS(OP, NAME) \
+ FUNC(s, int, 32, 2, OP, NAME) \
+ FUNC(u, uint, 32, 2, OP, NAME) \
+ FUNC(s, int, 16, 4, OP, NAME) \
+ FUNC(u, uint, 16, 4, OP, NAME) \
+ FUNC(s, int, 8, 8, OP, NAME) \
+ FUNC(u, uint, 8, 8, OP, NAME) \
+ FUNC(s, int, 32, 4, OP, NAME) \
+ FUNC(u, uint, 32, 4, OP, NAME) \
+ FUNC(s, int, 16, 8, OP, NAME) \
+ FUNC(u, uint, 16, 8, OP, NAME) \
+ FUNC(s, int, 8, 16, OP, NAME) \
+ FUNC(u, uint, 8, 16, OP, NAME)
+
+ALL_FUNCS(==, vcmpeq)
+ALL_FUNCS(!=, vcmpne)
+ALL_FUNCS(<, vcmplt)
+ALL_FUNCS(<=, vcmple)
+ALL_FUNCS(>, vcmpgt)
+ALL_FUNCS(>=, vcmpge)
+
+/* MVE has only 128-bit vectors, so we can vectorize only half of the
+ functions above. */
+/* { dg-final { scan-assembler-times {\tvcmp.i[0-9]+ eq, q[0-9]+, q[0-9]+\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.i[0-9]+ ne, q[0-9]+, q[0-9]+\n} 6 } } */
+
+/* lt, le, gt, ge apply to signed types, cs and hi to unsigned types. */
+/* lt and le with unsigned types are replaced with the opposite condition, hence
+ the double number of matches for cs and hi. */
+/* { dg-final { scan-assembler-times {\tvcmp.s[0-9]+ lt, q[0-9]+, q[0-9]+\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s[0-9]+ le, q[0-9]+, q[0-9]+\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s[0-9]+ gt, q[0-9]+, q[0-9]+\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.s[0-9]+ ge, q[0-9]+, q[0-9]+\n} 3 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u[0-9]+ cs, q[0-9]+, q[0-9]+\n} 6 } } */
+/* { dg-final { scan-assembler-times {\tvcmp.u[0-9]+ hi, q[0-9]+, q[0-9]+\n} 6 } } */