summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2023-05-12 18:37:13 +0200
committerUros Bizjak <ubizjak@gmail.com>2023-05-12 18:39:32 +0200
commit75aa95694ed9bab50b5e152f7a755eb5f5e8014e (patch)
tree84f6abb97498a7545ba4b7532cfe08c0fd6c28ae
parenta835f046cdf017b9e8ad5576df4f10daaf8420d0 (diff)
downloadgcc-75aa95694ed9bab50b5e152f7a755eb5f5e8014e.tar.gz
i386: Remove mulv2si emulated sequence for TARGET_SSE2 [PR109797]
Remove mulv2si emulated sequence for TARGET_SSE2 and enable only native PMULLD instruction for TARGET_SSE4_1. Ideally, the vectorization for TARGET_SSE2 should depend on more precise cost estimation (the PR contains patch for ix86_multiplication_cost), but even with patched cost function the runtime regression was not fixed. PR target/109797 gcc/ChangeLog: * config/i386/mmx.md (mulv2si3): Remove expander. (mulv2si3): Rename insn pattern from *mulv2si.
-rw-r--r--gcc/config/i386/mmx.md34
1 files changed, 1 insertions, 33 deletions
diff --git a/gcc/config/i386/mmx.md b/gcc/config/i386/mmx.md
index e7ca921dd2b..b2954fff8ae 100644
--- a/gcc/config/i386/mmx.md
+++ b/gcc/config/i386/mmx.md
@@ -2092,39 +2092,7 @@
(set_attr "type" "sseadd")
(set_attr "mode" "TI")])
-(define_expand "mulv2si3"
- [(set (match_operand:V2SI 0 "register_operand")
- (mult:V2SI
- (match_operand:V2SI 1 "register_operand")
- (match_operand:V2SI 2 "register_operand")))]
- "TARGET_MMX_WITH_SSE"
-{
- if (!TARGET_SSE4_1)
- {
- rtx op1 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[1]),
- V2SImode);
- rtx op2 = lowpart_subreg (V4SImode, force_reg (V2SImode, operands[2]),
- V2SImode);
-
- rtx tmp1 = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_interleave_lowv4si (tmp1, op1, op1));
- rtx tmp2 = gen_reg_rtx (V4SImode);
- emit_insn (gen_vec_interleave_lowv4si (tmp2, op2, op2));
-
- rtx res = gen_reg_rtx (V2DImode);
- emit_insn (gen_vec_widen_umult_even_v4si (res, tmp1, tmp2));
-
- rtx op0 = gen_reg_rtx (V4SImode);
- emit_insn (gen_sse2_pshufd_1 (op0, gen_lowpart (V4SImode, res),
- const0_rtx, const2_rtx,
- const0_rtx, const2_rtx));
-
- emit_move_insn (operands[0], lowpart_subreg (V2SImode, op0, V4SImode));
- DONE;
- }
-})
-
-(define_insn "*mulv2si3"
+(define_insn "mulv2si3"
[(set (match_operand:V2SI 0 "register_operand" "=Yr,*x,v")
(mult:V2SI
(match_operand:V2SI 1 "register_operand" "%0,0,v")