summaryrefslogtreecommitdiff
path: root/celt/arm/arm_celt_map.c
diff options
context:
space:
mode:
authorViswanath Puttagunta <viswanath.puttagunta@linaro.org>2015-05-15 12:42:19 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2015-10-07 18:09:20 -0400
commitf48abe8308ba7a67e443ad0911e06d62fd47ba91 (patch)
treea231446a0bc212ae891524881580f9b5ffd90798 /celt/arm/arm_celt_map.c
parent0fe514352568530d4bd18a6686e6878417e6cf41 (diff)
downloadopus-f48abe8308ba7a67e443ad0911e06d62fd47ba91.tar.gz
armv7(float): Optimize encode usecase using NE10 library
Optimize opus encode (float only) usecase using ARM NE10 library. Mainly effects opus_fft and ctl_mdct_forward and related functions. This optimization can be used for ARM CPUs that have NEON VFP unit. This patch only enables optimizations for ARMv7. Official ARM NE10 library page available at http://projectne10.github.io/Ne10/ To enable this optimization, use --enable-intrinsics --with-NE10=<install_prefix> or --enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir> Compile time checks made during configure process to make sure optimization option available only when compiler supports NEON instrinsics. Runtime checks made to make sure optimized functions only called on appropriate hardware. Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
Diffstat (limited to 'celt/arm/arm_celt_map.c')
-rw-r--r--celt/arm/arm_celt_map.c49
1 files changed, 45 insertions, 4 deletions
diff --git a/celt/arm/arm_celt_map.c b/celt/arm/arm_celt_map.c
index 68c224df..87ba3b3e 100644
--- a/celt/arm/arm_celt_map.c
+++ b/celt/arm/arm_celt_map.c
@@ -30,6 +30,8 @@
#endif
#include "pitch.h"
+#include "kiss_fft.h"
+#include "mdct.h"
#if defined(OPUS_HAVE_RTCD)
@@ -42,7 +44,7 @@ opus_val32 (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
MAY_HAVE_NEON(celt_pitch_xcorr) /* NEON */
};
# else /* !FIXED_POINT */
-# if defined(OPUS_ARM_NEON_INTR)
+# if defined(OPUS_ARM_MAY_HAVE_NEON_INTR)
void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int) = {
celt_pitch_xcorr_c, /* ARMv4 */
@@ -50,7 +52,46 @@ void (*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
celt_pitch_xcorr_c, /* Media */
celt_pitch_xcorr_float_neon /* Neon */
};
-# endif
-# endif
-#endif
+# if defined(HAVE_ARM_NE10)
+# if defined(CUSTOM_MODES)
+int (*const OPUS_FFT_ALLOC_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
+ opus_fft_alloc_arch_c, /* ARMv4 */
+ opus_fft_alloc_arch_c, /* EDSP */
+ opus_fft_alloc_arch_c, /* Media */
+ opus_fft_alloc_arm_float_neon /* Neon with NE10 library support */
+};
+
+void (*const OPUS_FFT_FREE_ARCH_IMPL[OPUS_ARCHMASK+1])(kiss_fft_state *st) = {
+ opus_fft_free_arch_c, /* ARMv4 */
+ opus_fft_free_arch_c, /* EDSP */
+ opus_fft_free_arch_c, /* Media */
+ opus_fft_free_arm_float_neon /* Neon with NE10 */
+};
+# endif /* CUSTOM_MODES */
+
+void (*const OPUS_FFT[OPUS_ARCHMASK+1])(const kiss_fft_state *cfg,
+ const kiss_fft_cpx *fin,
+ kiss_fft_cpx *fout) = {
+ opus_fft_c, /* ARMv4 */
+ opus_fft_c, /* EDSP */
+ opus_fft_c, /* Media */
+ opus_fft_float_neon /* Neon with NE10 */
+};
+
+void (*const CLT_MDCT_FORWARD_IMPL[OPUS_ARCHMASK+1])(const mdct_lookup *l,
+ kiss_fft_scalar *in,
+ kiss_fft_scalar * OPUS_RESTRICT out,
+ const opus_val16 *window,
+ int overlap, int shift,
+ int stride, int arch) = {
+ clt_mdct_forward_c, /* ARMv4 */
+ clt_mdct_forward_c, /* EDSP */
+ clt_mdct_forward_c, /* Media */
+ clt_mdct_forward_float_neon /* Neon with NE10 */
+};
+# endif /* HAVE_ARM_NE10 */
+# endif /* OPUS_ARM_MAY_HAVE_NEON_INTR */
+# endif /* FIXED_POINT */
+
+#endif /* OPUS_HAVE_RTCD */