diff options
author | Viswanath Puttagunta <viswanath.puttagunta@linaro.org> | 2015-05-15 12:42:19 -0500 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2015-10-07 18:09:20 -0400 |
commit | f48abe8308ba7a67e443ad0911e06d62fd47ba91 (patch) | |
tree | a231446a0bc212ae891524881580f9b5ffd90798 /celt/kiss_fft.h | |
parent | 0fe514352568530d4bd18a6686e6878417e6cf41 (diff) | |
download | opus-f48abe8308ba7a67e443ad0911e06d62fd47ba91.tar.gz |
armv7(float): Optimize encode usecase using NE10 library
Optimize opus encode (float only) usecase using ARM NE10
library. Mainly effects opus_fft and ctl_mdct_forward
and related functions.
This optimization can be used for ARM CPUs that have NEON
VFP unit. This patch only enables optimizations for ARMv7.
Official ARM NE10 library page available at
http://projectne10.github.io/Ne10/
To enable this optimization, use
--enable-intrinsics --with-NE10=<install_prefix>
or
--enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir>
Compile time checks made during configure process to make sure
optimization option available only when compiler supports NEON
instrinsics.
Runtime checks made to make sure optimized functions only called
on appropriate hardware.
Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
Diffstat (limited to 'celt/kiss_fft.h')
-rw-r--r-- | celt/kiss_fft.h | 57 |
1 files changed, 53 insertions, 4 deletions
diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h index 390b54d9..99f0e500 100644 --- a/celt/kiss_fft.h +++ b/celt/kiss_fft.h @@ -32,6 +32,7 @@ #include <stdlib.h> #include <math.h> #include "arch.h" +#include "cpu_support.h" #ifdef __cplusplus extern "C" { @@ -77,6 +78,11 @@ typedef struct { 4*4*4*2 */ +typedef struct arch_fft_state{ + int is_supported; + void *priv; +} arch_fft_state; + typedef struct kiss_fft_state{ int nfft; opus_val16 scale; @@ -87,8 +93,15 @@ typedef struct kiss_fft_state{ opus_int16 factors[2*MAXFACTORS]; const opus_int16 *bitrev; const kiss_twiddle_cpx *twiddles; +#ifndef FIXED_POINT + arch_fft_state *arch_fft; +#endif } kiss_fft_state; +#if !defined(FIXED_POINT) && defined(HAVE_ARM_NE10) +#include "arm/fft_arm.h" +#endif + /*typedef struct kiss_fft_state* kiss_fft_cfg;*/ /** @@ -114,9 +127,9 @@ typedef struct kiss_fft_state{ * buffer size in *lenmem. * */ -kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base); +kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem, const kiss_fft_state *base, int arch); -kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); +kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch); /** * opus_fft(cfg,in_out_buf) @@ -128,13 +141,49 @@ kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem); * Note that each element is complex and can be accessed like f[k].r and f[k].i * */ -void opus_fft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); +void opus_fft_c(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); void opus_ifft(const kiss_fft_state *cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout); void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); void opus_ifft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout); -void opus_fft_free(const kiss_fft_state *cfg); +void opus_fft_free(const kiss_fft_state *cfg, int arch); + + +void opus_fft_free_arch_c(kiss_fft_state *st); +int opus_fft_alloc_arch_c(kiss_fft_state *st); + +#if !defined(OVERRIDE_OPUS_FFT) +/* Is run-time CPU detection enabled on this platform? */ +#if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) + +extern int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])( + kiss_fft_state *st); + +#define opus_fft_alloc_arch(_st, arch) \ + ((*OPUS_FFT_ALLOC_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) + +extern void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])( + kiss_fft_state *st); +#define opus_fft_free_arch(_st, arch) \ + ((*OPUS_FFT_FREE_ARCH_IMPL[(arch)&OPUS_ARCHMASK])(_st)) + +extern void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg, + const kiss_fft_cpx *fin, kiss_fft_cpx *fout); +#define opus_fft(_cfg, _fin, _fout, arch) \ + ((*OPUS_FFT[(arch)&OPUS_ARCHMASK])(_cfg, _fin, _fout)) +#else /* else for if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ + +#define opus_fft_alloc_arch(_st, arch) \ + ((void)(arch), opus_fft_alloc_arch_c(_st)) + +#define opus_fft_free_arch(_st, arch) \ + ((void)(arch), opus_fft_free_arch_c(_st)) + +#define opus_fft(_cfg, _fin, _fout, arch) \ + ((void)(arch), opus_fft_c(_cfg, _fin, _fout)) +#endif /* end if defined(OPUS_HAVE_RTCD) && (defined(HAVE_ARM_NE10)) */ +#endif /* end if !defined(OVERRIDE_OPUS_FFT) */ #ifdef __cplusplus } |