diff options
-rw-r--r-- | libavcodec/fft-internal.h | 14 | ||||
-rw-r--r-- | libavcodec/fft.c | 3 | ||||
-rw-r--r-- | libavcodec/fft.h | 5 | ||||
-rw-r--r-- | libavcodec/mdct_fixed.c | 44 |
4 files changed, 63 insertions, 3 deletions
diff --git a/libavcodec/fft-internal.h b/libavcodec/fft-internal.h index 1f240dbd18..d30571b733 100644 --- a/libavcodec/fft-internal.h +++ b/libavcodec/fft-internal.h @@ -39,6 +39,8 @@ #include "libavutil/intmath.h" #include "mathops.h" +void ff_mdct_calcw_c(FFTContext *s, FFTDouble *output, const FFTSample *input); + #define SCALE_FLOAT(a, bits) lrint((a) * (double)(1 << (bits))) #define FIX15(a) av_clip(SCALE_FLOAT(a, 15), -32767, 32767) @@ -49,11 +51,17 @@ y = (a + b) >> 1; \ } while (0) -#define CMUL(dre, dim, are, aim, bre, bim) do { \ - (dre) = (MUL16(are, bre) - MUL16(aim, bim)) >> 15; \ - (dim) = (MUL16(are, bim) + MUL16(aim, bre)) >> 15; \ +#define CMULS(dre, dim, are, aim, bre, bim, sh) do { \ + (dre) = (MUL16(are, bre) - MUL16(aim, bim)) >> sh; \ + (dim) = (MUL16(are, bim) + MUL16(aim, bre)) >> sh; \ } while (0) +#define CMUL(dre, dim, are, aim, bre, bim) \ + CMULS(dre, dim, are, aim, bre, bim, 15) + +#define CMULL(dre, dim, are, aim, bre, bim) \ + CMULS(dre, dim, are, aim, bre, bim, 0) + #endif /* CONFIG_FFT_FLOAT */ #define ff_imdct_calc_c FFT_NAME(ff_imdct_calc_c) diff --git a/libavcodec/fft.c b/libavcodec/fft.c index ff5d34baf3..19da4830a9 100644 --- a/libavcodec/fft.c +++ b/libavcodec/fft.c @@ -123,6 +123,9 @@ av_cold int ff_fft_init(FFTContext *s, int nbits, int inverse) if (ARCH_ARM) ff_fft_init_arm(s); if (HAVE_ALTIVEC) ff_fft_init_altivec(s); if (HAVE_MMX) ff_fft_init_mmx(s); + if (CONFIG_MDCT) s->mdct_calcw = s->mdct_calc; +#else + if (CONFIG_MDCT) s->mdct_calcw = ff_mdct_calcw_c; #endif for(j=4; j<=nbits; j++) { diff --git a/libavcodec/fft.h b/libavcodec/fft.h index 2f13e5fdb3..0f324cf5a2 100644 --- a/libavcodec/fft.h +++ b/libavcodec/fft.h @@ -53,6 +53,10 @@ typedef struct FFTContext FFTContext; #endif /* CONFIG_FFT_FLOAT */ +typedef struct FFTDComplex { + FFTDouble re, im; +} FFTDComplex; + /* FFT computation */ struct FFTContext { @@ -77,6 +81,7 @@ struct FFTContext { void (*imdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input); void (*imdct_half)(struct FFTContext *s, FFTSample *output, const FFTSample *input); void (*mdct_calc)(struct FFTContext *s, FFTSample *output, const FFTSample *input); + void (*mdct_calcw)(struct FFTContext *s, FFTDouble *output, const FFTSample *input); int fft_permutation; #define FF_FFT_PERM_DEFAULT 0 #define FF_FFT_PERM_SWAP_LSBS 1 diff --git a/libavcodec/mdct_fixed.c b/libavcodec/mdct_fixed.c index 19c8039e04..94527f9e85 100644 --- a/libavcodec/mdct_fixed.c +++ b/libavcodec/mdct_fixed.c @@ -18,3 +18,47 @@ #define CONFIG_FFT_FLOAT 0 #include "mdct.c" + +/* same as ff_mdct_calcw_c with double-width unscaled output */ +void ff_mdct_calcw_c(FFTContext *s, FFTDouble *out, const FFTSample *input) +{ + int i, j, n, n8, n4, n2, n3; + FFTDouble re, im; + const uint16_t *revtab = s->revtab; + const FFTSample *tcos = s->tcos; + const FFTSample *tsin = s->tsin; + FFTComplex *x = s->tmp_buf; + FFTDComplex *o = (FFTDComplex *)out; + + n = 1 << s->mdct_bits; + n2 = n >> 1; + n4 = n >> 2; + n8 = n >> 3; + n3 = 3 * n4; + + /* pre rotation */ + for(i=0;i<n8;i++) { + re = RSCALE(-input[2*i+n3] - input[n3-1-2*i]); + im = RSCALE(-input[n4+2*i] + input[n4-1-2*i]); + j = revtab[i]; + CMUL(x[j].re, x[j].im, re, im, -tcos[i], tsin[i]); + + re = RSCALE( input[2*i] - input[n2-1-2*i]); + im = RSCALE(-input[n2+2*i] - input[ n-1-2*i]); + j = revtab[n8 + i]; + CMUL(x[j].re, x[j].im, re, im, -tcos[n8 + i], tsin[n8 + i]); + } + + s->fft_calc(s, x); + + /* post rotation */ + for(i=0;i<n8;i++) { + FFTDouble r0, i0, r1, i1; + CMULL(i1, r0, x[n8-i-1].re, x[n8-i-1].im, -tsin[n8-i-1], -tcos[n8-i-1]); + CMULL(i0, r1, x[n8+i ].re, x[n8+i ].im, -tsin[n8+i ], -tcos[n8+i ]); + o[n8-i-1].re = r0; + o[n8-i-1].im = i0; + o[n8+i ].re = r1; + o[n8+i ].im = i1; + } +} |