summaryrefslogtreecommitdiff
path: root/libavcodec/x86/fft.asm
diff options
context:
space:
mode:
authorDiego Biurrun <diego@biurrun.de>2012-07-15 20:11:55 +0200
committerDiego Biurrun <diego@biurrun.de>2017-03-14 17:23:32 +0100
commit681a86aba6cb09b98ad716d986182060c7795d20 (patch)
tree5f2fd54c9772deee816a7b57d04a7dea8da6da41 /libavcodec/x86/fft.asm
parente9bb77fb1012cba1951a82136df7071f71bce8fb (diff)
downloadffmpeg-681a86aba6cb09b98ad716d986182060c7795d20.tar.gz
x86: fft: Port to cpuflags
Diffstat (limited to 'libavcodec/x86/fft.asm')
-rw-r--r--libavcodec/x86/fft.asm94
1 files changed, 43 insertions, 51 deletions
diff --git a/libavcodec/x86/fft.asm b/libavcodec/x86/fft.asm
index 2c7438134c..63e92f7159 100644
--- a/libavcodec/x86/fft.asm
+++ b/libavcodec/x86/fft.asm
@@ -190,6 +190,23 @@ SECTION .text
addps %2, %2, %5 ; {i0,i1,i2,i3}
%endmacro
+%macro INTERL 5
+%if cpuflag(avx)
+ vunpckhps %3, %2, %1
+ vunpcklps %2, %2, %1
+ vextractf128 %4(%5), %2, 0
+ vextractf128 %4 %+ H(%5), %3, 0
+ vextractf128 %4(%5 + 1), %2, 1
+ vextractf128 %4 %+ H(%5 + 1), %3, 1
+%elif cpuflag(sse)
+ mova %3, %2
+ unpcklps %2, %1
+ unpckhps %3, %1
+ mova %4(%5), %2
+ mova %4(%5+1), %3
+%endif
+%endmacro
+
; scheduled for cpu-bound sizes
%macro PASS_SMALL 3 ; (to load m4-m7), wre, wim
IF%1 mova m4, Z(4)
@@ -536,17 +553,6 @@ DEFINE_ARGS zc, w, n, o1, o3
INIT_YMM avx
-%macro INTERL_AVX 5
- vunpckhps %3, %2, %1
- vunpcklps %2, %2, %1
- vextractf128 %4(%5), %2, 0
- vextractf128 %4 %+ H(%5), %3, 0
- vextractf128 %4(%5 + 1), %2, 1
- vextractf128 %4 %+ H(%5 + 1), %3, 1
-%endmacro
-
-%define INTERL INTERL_AVX
-
DECL_PASS pass_avx, PASS_BIG 1
DECL_PASS pass_interleave_avx, PASS_BIG 0
@@ -560,16 +566,6 @@ cglobal fft_calc, 2,5,8
INIT_XMM sse
-%macro INTERL_SSE 5
- mova %3, %2
- unpcklps %2, %1
- unpckhps %3, %1
- mova %4(%5), %2
- mova %4(%5+1), %3
-%endmacro
-
-%define INTERL INTERL_SSE
-
DECL_PASS pass_sse, PASS_BIG 1
DECL_PASS pass_interleave_sse, PASS_BIG 0
@@ -855,16 +851,30 @@ INIT_XMM sse
%endmacro
%macro CMUL 6 ;j, xmm0, xmm1, 3, 4, 5
+%if cpuflag(sse)
mulps m6, %3, [%5+%1]
mulps m7, %2, [%5+%1]
mulps %2, %2, [%6+%1]
mulps %3, %3, [%6+%1]
subps %2, %2, m6
addps %3, %3, m7
+%elif cpuflag(3dnow)
+ mova m6, [%1+%2*2]
+ mova %3, [%1+%2*2+8]
+ mova %4, m6
+ mova m7, %3
+ pfmul m6, [%5+%2]
+ pfmul %3, [%6+%2]
+ pfmul %4, [%6+%2]
+ pfmul m7, [%5+%2]
+ pfsub %3, m6
+ pfadd %4, m7
+%endif
%endmacro
-%macro POSROTATESHUF_AVX 5 ;j, k, z+n8, tcos+n8, tsin+n8
+%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
.post:
+%if cpuflag(avx)
vmovaps ymm1, [%3+%1*2]
vmovaps ymm0, [%3+%1*2+0x20]
vmovaps ymm3, [%3+%2*2]
@@ -893,10 +903,7 @@ INIT_XMM sse
sub %2, 0x20
add %1, 0x20
jl .post
-%endmacro
-
-%macro POSROTATESHUF 5 ;j, k, z+n8, tcos+n8, tsin+n8
-.post:
+%elif cpuflag(sse)
movaps xmm1, [%3+%1*2]
movaps xmm0, [%3+%1*2+0x10]
CMUL %1, xmm0, xmm1, %3, %4, %5
@@ -918,25 +925,9 @@ INIT_XMM sse
sub %2, 0x10
add %1, 0x10
jl .post
-%endmacro
-
-%macro CMUL_3DNOW 6
- mova m6, [%1+%2*2]
- mova %3, [%1+%2*2+8]
- mova %4, m6
- mova m7, %3
- pfmul m6, [%5+%2]
- pfmul %3, [%6+%2]
- pfmul %4, [%6+%2]
- pfmul m7, [%5+%2]
- pfsub %3, m6
- pfadd %4, m7
-%endmacro
-
-%macro POSROTATESHUF_3DNOW 5 ;j, k, z+n8, tcos+n8, tsin+n8
-.post:
- CMUL_3DNOW %3, %1, m0, m1, %4, %5
- CMUL_3DNOW %3, %2, m2, m3, %4, %5
+%elif cpuflag(3dnow)
+ CMUL %3, %1, m0, m1, %4, %5
+ CMUL %3, %2, m2, m3, %4, %5
movd [%3+%1*2+ 0], m0
movd [%3+%2*2+12], m1
movd [%3+%2*2+ 0], m2
@@ -952,9 +943,10 @@ INIT_XMM sse
sub %2, 8
add %1, 8
jl .post
+%endif
%endmacro
-%macro DECL_IMDCT 1
+%macro DECL_IMDCT 0
cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *input
%if ARCH_X86_64
%define rrevtab r7
@@ -1060,7 +1052,7 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
neg r0
mov r1, -mmsize
sub r1, r0
- %1 r0, r1, r6, rtcos, rtsin
+ POSROTATESHUF r0, r1, r6, rtcos, rtsin
%if ARCH_X86_64 == 0
add esp, 12
%endif
@@ -1070,17 +1062,17 @@ cglobal imdct_half, 3,12,8; FFTContext *s, FFTSample *output, const FFTSample *i
RET
%endmacro
-DECL_IMDCT POSROTATESHUF
+DECL_IMDCT
%if ARCH_X86_32
INIT_MMX 3dnow
-DECL_IMDCT POSROTATESHUF_3DNOW
+DECL_IMDCT
INIT_MMX 3dnowext
-DECL_IMDCT POSROTATESHUF_3DNOW
+DECL_IMDCT
%endif
INIT_YMM avx
-DECL_IMDCT POSROTATESHUF_AVX
+DECL_IMDCT
%endif ; CONFIG_MDCT