diff options
author | Zheng Lv <lvzheng@google.com> | 2022-09-13 14:40:52 +0800 |
---|---|---|
committer | Felicia Lim <flim@google.com> | 2023-01-17 20:10:28 -0800 |
commit | 8cf872a186b96085b1bb3a547afd598354ebeb87 (patch) | |
tree | 23264149c9493d73559addf465b2e972d279414b | |
parent | 757c53f775a0b651b0512a1992d67f4b2159a378 (diff) | |
download | opus-8cf872a186b96085b1bb3a547afd598354ebeb87.tar.gz |
Make CELT FFT twiddle complex type aligned
This makes kiss_twiddle_cpx 4-byte aligned (instead of 2-byte) for
fixed-point builds. Tested with an armv6j+nofp development board, CELT
encoding becomes 1.4x as fast, and decoding over 2x.
Performance gain is mostly attributed to the proper alignment of the
static const array mdct_twiddles960.
Co-authored-by: David Gao <davidgao@google.com>
Signed-off-by: Felicia Lim <flim@google.com>
-rw-r--r-- | celt/kiss_fft.h | 12 |
1 files changed, 11 insertions, 1 deletions
diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h index bffa2bfa..267f72f9 100644 --- a/celt/kiss_fft.h +++ b/celt/kiss_fft.h @@ -52,6 +52,10 @@ extern "C" { # define kiss_fft_scalar opus_int32 # define kiss_twiddle_scalar opus_int16 +/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory + * access, and could benefit from additional alignment. + */ +# define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32)) #else # ifndef kiss_fft_scalar @@ -62,6 +66,12 @@ extern "C" { # endif #endif +#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT) +#define KISS_TWIDDLE_CPX_ALIGNED __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT))) +#else +#define KISS_TWIDDLE_CPX_ALIGNED +#endif + typedef struct { kiss_fft_scalar r; kiss_fft_scalar i; @@ -70,7 +80,7 @@ typedef struct { typedef struct { kiss_twiddle_scalar r; kiss_twiddle_scalar i; -}kiss_twiddle_cpx; +} KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx; #define MAXFACTORS 8 /* e.g. an fft of length 128 has 4 factors |