diff options
author | Justin Ruggles <justin.ruggles@gmail.com> | 2012-06-18 23:39:14 -0400 |
---|---|---|
committer | Justin Ruggles <justin.ruggles@gmail.com> | 2012-07-27 11:25:48 -0400 |
commit | 79687079a97a039c325ab79d7a95920d800b791f (patch) | |
tree | b9a7056666e923047c1f0722f4e0e6dafa35ad8d | |
parent | 0cf7d849ffcd82ef4b8279b0a5aec306c29717f7 (diff) | |
download | ffmpeg-79687079a97a039c325ab79d7a95920d800b791f.tar.gz |
x86: add support for fmaddps fma4 instruction with abstraction to avx/sse
-rwxr-xr-x | configure | 5 | ||||
-rw-r--r-- | libavutil/x86/x86inc.asm | 16 |
2 files changed, 16 insertions, 5 deletions
@@ -242,6 +242,7 @@ Optimization options (experts only): --disable-sse disable SSE optimizations --disable-ssse3 disable SSSE3 optimizations --disable-avx disable AVX optimizations + --disable-fma4 disable FMA4 optimizations --disable-armv5te disable armv5te optimizations --disable-armv6 disable armv6 optimizations --disable-armv6t2 disable armv6t2 optimizations @@ -1047,6 +1048,7 @@ ARCH_EXT_LIST=' armv6t2 armvfp avx + fma4 mmi mmx mmx2 @@ -1295,6 +1297,7 @@ mmx2_deps="mmx" sse_deps="mmx" ssse3_deps="sse" avx_deps="ssse3" +fma4_deps="avx" aligned_stack_if_any="ppc x86" fast_64bit_if_any="alpha ia64 mips64 parisc64 ppc64 sparc64 x86_64" @@ -2865,6 +2868,7 @@ EOF check_yasm "pextrd [eax], xmm0, 1" && enable yasm || die "yasm not found, use --disable-yasm for a crippled build" check_yasm "vextractf128 xmm0, ymm0, 0" || disable avx + check_yasm "vfmaddps ymm0, ymm1, ymm2, ymm3" || disable fma4 fi case "$cpu" in @@ -3292,6 +3296,7 @@ if enabled x86; then echo "SSE enabled ${sse-no}" echo "SSSE3 enabled ${ssse3-no}" echo "AVX enabled ${avx-no}" + echo "FMA4 enabled ${fma4-no}" echo "CMOV enabled ${cmov-no}" echo "CMOV is fast ${fast_cmov-no}" echo "EBX available ${ebx_available-no}" diff --git a/libavutil/x86/x86inc.asm b/libavutil/x86/x86inc.asm index b76a10ca5e..4b4a19b208 100644 --- a/libavutil/x86/x86inc.asm +++ b/libavutil/x86/x86inc.asm @@ -1093,16 +1093,22 @@ AVX_INSTR pfmul, 1, 0, 1 %undef j %macro FMA_INSTR 3 - %macro %1 4-7 %1, %2, %3 - %if cpuflag(xop) - v%5 %1, %2, %3, %4 + %macro %1 5-8 %1, %2, %3 + %if cpuflag(xop) || cpuflag(fma4) + v%6 %1, %2, %3, %4 %else - %6 %1, %2, %3 - %7 %1, %4 + %ifidn %1, %4 + %7 %5, %2, %3 + %8 %1, %4, %5 + %else + %7 %1, %2, %3 + %8 %1, %4 + %endif %endif %endmacro %endmacro +FMA_INSTR fmaddps, mulps, addps FMA_INSTR pmacsdd, pmulld, paddd FMA_INSTR pmacsww, pmullw, paddw FMA_INSTR pmadcswd, pmaddwd, paddd |