summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZheng Lv <lvzheng@google.com>2022-09-13 14:40:52 +0800
committerFelicia Lim <flim@google.com>2023-01-17 20:10:28 -0800
commit8cf872a186b96085b1bb3a547afd598354ebeb87 (patch)
tree23264149c9493d73559addf465b2e972d279414b
parent757c53f775a0b651b0512a1992d67f4b2159a378 (diff)
downloadopus-8cf872a186b96085b1bb3a547afd598354ebeb87.tar.gz
Make CELT FFT twiddle complex type aligned
This makes kiss_twiddle_cpx 4-byte aligned (instead of 2-byte) for fixed-point builds. Tested with an armv6j+nofp development board, CELT encoding becomes 1.4x as fast, and decoding over 2x. Performance gain is mostly attributed to the proper alignment of the static const array mdct_twiddles960. Co-authored-by: David Gao <davidgao@google.com> Signed-off-by: Felicia Lim <flim@google.com>
-rw-r--r--celt/kiss_fft.h12
1 files changed, 11 insertions, 1 deletions
diff --git a/celt/kiss_fft.h b/celt/kiss_fft.h
index bffa2bfa..267f72f9 100644
--- a/celt/kiss_fft.h
+++ b/celt/kiss_fft.h
@@ -52,6 +52,10 @@ extern "C" {
# define kiss_fft_scalar opus_int32
# define kiss_twiddle_scalar opus_int16
+/* Some 32-bit CPUs would load/store a kiss_twiddle_cpx with a single memory
+ * access, and could benefit from additional alignment.
+ */
+# define KISS_TWIDDLE_CPX_ALIGNMENT (sizeof(opus_int32))
#else
# ifndef kiss_fft_scalar
@@ -62,6 +66,12 @@ extern "C" {
# endif
#endif
+#if defined(__GNUC__) && defined(KISS_TWIDDLE_CPX_ALIGNMENT)
+#define KISS_TWIDDLE_CPX_ALIGNED __attribute__((aligned(KISS_TWIDDLE_CPX_ALIGNMENT)))
+#else
+#define KISS_TWIDDLE_CPX_ALIGNED
+#endif
+
typedef struct {
kiss_fft_scalar r;
kiss_fft_scalar i;
@@ -70,7 +80,7 @@ typedef struct {
typedef struct {
kiss_twiddle_scalar r;
kiss_twiddle_scalar i;
-}kiss_twiddle_cpx;
+} KISS_TWIDDLE_CPX_ALIGNED kiss_twiddle_cpx;
#define MAXFACTORS 8
/* e.g. an fft of length 128 has 4 factors