summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorUros Bizjak <ubizjak@gmail.com>2023-05-17 20:25:32 +0200
committerUros Bizjak <ubizjak@gmail.com>2023-05-17 20:26:08 +0200
commit3da8f61fe2b2cff66e37b972ca5da462a0841c29 (patch)
tree122155032c0899a91c70fb68de20defaa5427d51
parentf5b246ce5fd95e721f0f418633964f466448d2ae (diff)
downloadgcc-3da8f61fe2b2cff66e37b972ca5da462a0841c29.tar.gz
i386: Adjust emulated integer vector mode multiplication costs
Returned integer vector mode costs of emulated modes in ix86_multiplication_cost are wrong and do not reflect generated instruction sequences. Rewrite handling of different integer vector modes and different target ABIs to return real instruction counts in order to calcuate better costs of various emulated modes. gcc/ChangeLog: * config/i386/i386.cc (ix86_multiplication_cost): Correct calcuation of integer vector mode costs to reflect generated instruction sequences of different integer vector modes and different target ABIs.
-rw-r--r--gcc/config/i386/i386.cc72
1 files changed, 44 insertions, 28 deletions
diff --git a/gcc/config/i386/i386.cc b/gcc/config/i386/i386.cc
index 498fac468b5..9ab24242b59 100644
--- a/gcc/config/i386/i386.cc
+++ b/gcc/config/i386/i386.cc
@@ -20463,36 +20463,52 @@ ix86_multiplication_cost (const struct processor_costs *cost,
return ix86_vec_cost (mode,
inner_mode == DFmode ? cost->mulsd : cost->mulss);
else if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
- {
- /* vpmullq is used in this case. No emulation is needed. */
- if (TARGET_AVX512DQ)
- return ix86_vec_cost (mode, cost->mulss);
+ switch (mode)
+ {
+ case V16QImode:
+ /* V*QImode is emulated with 4-11 insns. */
+ if (TARGET_AVX512BW && TARGET_AVX512VL)
+ return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
+ else if (TARGET_XOP)
+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
+ /* FALLTHRU */
+ case V32QImode:
+ if (TARGET_AVX512BW && mode == V32QImode)
+ return ix86_vec_cost (mode, cost->mulss + cost->sse_op * 3);
+ else
+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 7);
- /* V*QImode is emulated with 7-13 insns. */
- if (mode == V16QImode || mode == V32QImode)
- {
- int extra = 11;
- if (TARGET_XOP && mode == V16QImode)
- extra = 5;
- else if (TARGET_SSSE3)
- extra = 6;
- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * extra);
- }
- /* V*DImode is emulated with 5-8 insns. */
- else if (mode == V2DImode || mode == V4DImode)
- {
- if (TARGET_XOP && mode == V2DImode)
- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 3);
- else
- return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
- }
- /* Without sse4.1, we don't have PMULLD; it's emulated with 7
- insns, including two PMULUDQ. */
- else if (mode == V4SImode && !(TARGET_SSE4_1 || TARGET_AVX))
- return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
- else
+ case V64QImode:
+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 9);
+
+ case V4SImode:
+ /* pmulld is used in this case. No emulation is needed. */
+ if (TARGET_SSE4_1)
+ goto do_native;
+ /* V4SImode is emulated with 7 insns. */
+ else
+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 5);
+
+ case V2DImode:
+ case V4DImode:
+ /* vpmullq is used in this case. No emulation is needed. */
+ if (TARGET_AVX512DQ && TARGET_AVX512VL)
+ goto do_native;
+ /* V*DImode is emulated with 6-8 insns. */
+ else if (TARGET_XOP && mode == V2DImode)
+ return ix86_vec_cost (mode, cost->mulss * 2 + cost->sse_op * 4);
+ /* FALLTHRU */
+ case V8DImode:
+ /* vpmullq is used in this case. No emulation is needed. */
+ if (TARGET_AVX512DQ && mode == V8DImode)
+ goto do_native;
+ else
+ return ix86_vec_cost (mode, cost->mulss * 3 + cost->sse_op * 5);
+
+ default:
+ do_native:
return ix86_vec_cost (mode, cost->mulss);
- }
+ }
else
return (cost->mult_init[MODE_INDEX (mode)] + cost->mult_bit * 7);
}