diff options
author | James Darnley <jdarnley@obe.tv> | 2022-12-14 17:16:28 +0100 |
---|---|---|
committer | James Darnley <jdarnley@obe.tv> | 2022-12-20 15:02:45 +0100 |
commit | 6af453ca389c56cb113876628cb173577faa9464 (patch) | |
tree | 168dc608e5811b6d38ab8bfb61f0335524065828 /libavcodec/x86/v210-init.c | |
parent | f30b4c2f47ab689a570a9b36b2e96e78bb462691 (diff) | |
download | ffmpeg-6af453ca389c56cb113876628cb173577faa9464.tar.gz |
avcodec/x86: add avx512icl function for v210dec
Ice Lake (Xeon Silver 4316): 2.01x faster (1147±36.8 vs. 571±38.2 decicycles) compared with avx2
Diffstat (limited to 'libavcodec/x86/v210-init.c')
-rw-r--r-- | libavcodec/x86/v210-init.c | 10 |
1 files changed, 9 insertions, 1 deletions
diff --git a/libavcodec/x86/v210-init.c b/libavcodec/x86/v210-init.c index 5db1fef98c..8b3677b8aa 100644 --- a/libavcodec/x86/v210-init.c +++ b/libavcodec/x86/v210-init.c @@ -17,7 +17,7 @@ */ #include "libavutil/attributes.h" -#include "libavutil/cpu.h" +#include "libavutil/x86/cpu.h" #include "libavcodec/v210dec.h" extern void ff_v210_planar_unpack_unaligned_ssse3(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); @@ -28,6 +28,8 @@ extern void ff_v210_planar_unpack_aligned_ssse3(const uint32_t *src, uint16_t *y extern void ff_v210_planar_unpack_aligned_avx(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); extern void ff_v210_planar_unpack_aligned_avx2(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); +extern void ff_v210_planar_unpack_avx512icl(const uint32_t *src, uint16_t *y, uint16_t *u, uint16_t *v, int width); + av_cold void ff_v210_x86_init(V210DecContext *s) { #if HAVE_X86ASM @@ -42,6 +44,9 @@ av_cold void ff_v210_x86_init(V210DecContext *s) if (HAVE_AVX2_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX2) s->unpack_frame = ff_v210_planar_unpack_aligned_avx2; + + if (EXTERNAL_AVX512ICL(cpu_flags)) + s->unpack_frame = ff_v210_planar_unpack_avx512icl; } else { if (cpu_flags & AV_CPU_FLAG_SSSE3) @@ -52,6 +57,9 @@ av_cold void ff_v210_x86_init(V210DecContext *s) if (HAVE_AVX2_EXTERNAL && cpu_flags & AV_CPU_FLAG_AVX2) s->unpack_frame = ff_v210_planar_unpack_unaligned_avx2; + + if (EXTERNAL_AVX512ICL(cpu_flags)) + s->unpack_frame = ff_v210_planar_unpack_avx512icl; } #endif } |