armv7(float): Optimize encode usecase using NE10 library

Optimize opus encode (float only) usecase using ARM NE10 library. Mainly effects opus_fft and ctl_mdct_forward and related functions. This optimization can be used for ARM CPUs that have NEON VFP unit. This patch only enables optimizations for ARMv7. Official ARM NE10 library page available at http://projectne10.github.io/Ne10/ To enable this optimization, use --enable-intrinsics --with-NE10=<install_prefix> or --enable-intrinsics --with-NE10-libraries=<NE10_lib_dir> --with-NE10-includes=<NE10_includes_dir> Compile time checks made during configure process to make sure optimization option available only when compiler supports NEON instrinsics. Runtime checks made to make sure optimized functions only called on appropriate hardware. Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
author: Viswanath Puttagunta <viswanath.puttagunta@linaro.org> 2015-05-15 12:42:19 -0500
committer: Jean-Marc Valin <jmvalin@jmvalin.ca> 2015-10-07 18:09:20 -0400
commit: f48abe8308ba7a67e443ad0911e06d62fd47ba91 (patch)
tree: a231446a0bc212ae891524881580f9b5ffd90798 /src
parent: 0fe514352568530d4bd18a6686e6878417e6cf41 (diff)
download: opus-f48abe8308ba7a67e443ad0911e06d62fd47ba91.tar.gz
4 files changed, 12 insertions, 9 deletions
diff --git a/src/analysis.c b/src/analysis.c
index 401a43e4..322e53c4 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -187,7 +187,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
    info_out->music_prob = psum;
 }
 
-static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix)
+static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt_mode, const void *x, int len, int offset, int c1, int c2, int C, int lsb_depth, downmix_func downmix, int arch)
 {
     int i, b;
     const kiss_fft_state *kfft;
@@ -260,7 +260,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
     remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill);
     downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C);
     tonal->mem_fill = 240 + remaining;
-    opus_fft(kfft, in, out);
+    opus_fft(kfft, in, out, arch);
 #ifndef FIXED_POINT
     /* If there's any NaN on the input, the entire output will be NaN, so we only need to check one value. */
     if (celt_isnan(out[0].r))
@@ -633,7 +633,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
 
 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
                  int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
-                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info)
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch)
 {
    int offset;
    int pcm_len;
@@ -646,7 +646,7 @@ void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, co
       pcm_len = analysis_frame_size - analysis->analysis_offset;
       offset = analysis->analysis_offset;
       do {
-         tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix);
+         tonality_analysis(analysis, celt_mode, analysis_pcm, IMIN(480, pcm_len), offset, c1, c2, C, lsb_depth, downmix, arch);
          offset += 480;
          pcm_len -= 480;
       } while (pcm_len>0);
diff --git a/src/analysis.h b/src/analysis.h
index 85a73d75..9c328e8b 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -82,6 +82,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
 
 void run_analysis(TonalityAnalysisState *analysis, const CELTMode *celt_mode, const void *analysis_pcm,
                  int analysis_frame_size, int frame_size, int c1, int c2, int C, opus_int32 Fs,
-                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info);
+                 int lsb_depth, downmix_func downmix, AnalysisInfo *analysis_info, int arch);
 
 #endif
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index d11e972f..9dbe4bf5 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1006,7 +1006,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
        analysis_read_subframe_bak = st->analysis.read_subframe;
        run_analysis(&st->analysis, celt_mode, analysis_pcm, analysis_size, frame_size,
              c1, c2, analysis_channels, st->Fs,
-             lsb_depth, downmix, &analysis_info);
+             lsb_depth, downmix, &analysis_info, st->arch);
     }
 #else
     (void)analysis_pcm;
diff --git a/src/opus_multistream_encoder.c b/src/opus_multistream_encoder.c
index aa6a2672..9e857735 100644
--- a/src/opus_multistream_encoder.c
+++ b/src/opus_multistream_encoder.c
@@ -72,6 +72,7 @@ typedef void (*opus_copy_channel_in_func)(
 
 struct OpusMSEncoder {
    ChannelLayout layout;
+   int arch;
    int lfe_stream;
    int application;
    int variable_duration;
@@ -221,7 +222,7 @@ opus_val16 logSum(opus_val16 a, opus_val16 b)
 #endif
 
 void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *bandLogE, opus_val32 *mem, opus_val32 *preemph_mem,
-      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in
+      int len, int overlap, int channels, int rate, opus_copy_channel_in_func copy_channel_in, int arch
 )
 {
    int c;
@@ -273,7 +274,8 @@ void surround_analysis(const CELTMode *celt_mode, const void *pcm, opus_val16 *b
          }
       }
 #endif
-      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window, overlap, celt_mode->maxLM-LM, 1);
+      clt_mdct_forward(&celt_mode->mdct, in, freq, celt_mode->window,
+            overlap, celt_mode->maxLM-LM, 1, arch);
       if (upsample != 1)
       {
          int bound = len;
@@ -427,6 +429,7 @@ static int opus_multistream_encoder_init_impl(
        (streams<1) || (coupled_streams<0) || (streams>255-coupled_streams))
       return OPUS_BAD_ARG;
 
+   st->arch = opus_select_arch();
    st->layout.nb_channels = channels;
    st->layout.nb_streams = streams;
    st->layout.nb_coupled_streams = coupled_streams;
@@ -783,7 +786,7 @@ static int opus_multistream_encode_native
    ALLOC(bandSMR, 21*st->layout.nb_channels, opus_val16);
    if (st->surround)
    {
-      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in);
+      surround_analysis(celt_mode, pcm, bandSMR, mem, preemph_mem, frame_size, 120, st->layout.nb_channels, Fs, copy_channel_in, st->arch);
    }
 
    /* Compute bitrate allocation between streams (this could be a lot better) */
author	Viswanath Puttagunta <viswanath.puttagunta@linaro.org>	2015-05-15 12:42:19 -0500
committer	Jean-Marc Valin <jmvalin@jmvalin.ca>	2015-10-07 18:09:20 -0400
commit	f48abe8308ba7a67e443ad0911e06d62fd47ba91 (patch)
tree	a231446a0bc212ae891524881580f9b5ffd90798 /src
parent	0fe514352568530d4bd18a6686e6878417e6cf41 (diff)
download	opus-f48abe8308ba7a67e443ad0911e06d62fd47ba91.tar.gz