diff options
author | xiangmingzhu <xiangzhu@cisco.com> | 2014-04-30 15:48:07 +0800 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2014-10-03 21:16:00 -0400 |
commit | c95c9a048f3283afb2e412b10085d4f7c19e1412 (patch) | |
tree | ed8873af6559d7a98922e0fed85be47c826ef521 /silk/float | |
parent | 80460334b77d70e665a390503cd8992cdad06c10 (diff) | |
download | opus-c95c9a048f3283afb2e412b10085d4f7c19e1412.tar.gz |
Cisco optimization for x86 & fixed point
1. Only for fixed point on x86 platform (32bit and 64bit, uses SIMD
intrinsics up to SSE4.2)
2. Use "configure --enable-fixed-point --enable-intrinsics" to enable
optimization, default is disabled.
3. Official test cases are verified and passed.
Signed-off-by: Timothy B. Terriberry <tterribe@xiph.org>
Diffstat (limited to 'silk/float')
-rw-r--r-- | silk/float/encode_frame_FLP.c | 2 | ||||
-rw-r--r-- | silk/float/find_pred_coefs_FLP.c | 3 | ||||
-rw-r--r-- | silk/float/main_FLP.h | 3 | ||||
-rw-r--r-- | silk/float/wrappers_FLP.c | 9 |
4 files changed, 10 insertions, 7 deletions
diff --git a/silk/float/encode_frame_FLP.c b/silk/float/encode_frame_FLP.c index d54e2686..2092a4d9 100644 --- a/silk/float/encode_frame_FLP.c +++ b/silk/float/encode_frame_FLP.c @@ -47,7 +47,7 @@ void silk_encode_do_VAD_FLP( /****************************/ /* Voice Activity Detection */ /****************************/ - silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1 ); + silk_VAD_GetSA_Q8( &psEnc->sCmn, psEnc->sCmn.inputBuf + 1, psEnc->sCmn.arch ); /**************************************************/ /* Convert speech activity into VAD and DTX flags */ diff --git a/silk/float/find_pred_coefs_FLP.c b/silk/float/find_pred_coefs_FLP.c index ea2c6c43..1d2dd72c 100644 --- a/silk/float/find_pred_coefs_FLP.c +++ b/silk/float/find_pred_coefs_FLP.c @@ -67,7 +67,8 @@ void silk_find_pred_coefs_FLP( /* Quantize LTP gain parameters */ silk_quant_LTP_gains_FLP( psEncCtrl->LTPCoef, psEnc->sCmn.indices.LTPIndex, &psEnc->sCmn.indices.PERIndex, - &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr ); + &psEnc->sCmn.sum_log_gain_Q7, WLTP, psEnc->sCmn.mu_LTP_Q9, psEnc->sCmn.LTPQuantLowComplexity, psEnc->sCmn.nb_subfr, + psEnc->sCmn.arch ); /* Control LTP scaling */ silk_LTP_scale_ctrl_FLP( psEnc, psEncCtrl, condCoding ); diff --git a/silk/float/main_FLP.h b/silk/float/main_FLP.h index fb553b61..e5a75972 100644 --- a/silk/float/main_FLP.h +++ b/silk/float/main_FLP.h @@ -205,7 +205,8 @@ void silk_quant_LTP_gains_FLP( const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ const opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr /* I number of subframes */ + const opus_int nb_subfr, /* I number of subframes */ + int arch /* I Run-time architecture */ ); /* Residual energy: nrg = wxx - 2 * wXx * c + c' * wXX * c */ diff --git a/silk/float/wrappers_FLP.c b/silk/float/wrappers_FLP.c index 350599b2..6666b8ef 100644 --- a/silk/float/wrappers_FLP.c +++ b/silk/float/wrappers_FLP.c @@ -161,10 +161,10 @@ void silk_NSQ_wrapper_FLP( /* Call NSQ */ if( psEnc->sCmn.nStatesDelayedDecision > 1 || psEnc->sCmn.warping_Q16 > 0 ) { silk_NSQ_del_dec( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 ); + AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); } else { silk_NSQ( &psEnc->sCmn, psNSQ, psIndices, x_Q3, pulses, PredCoef_Q12[ 0 ], LTPCoef_Q14, - AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14 ); + AR2_Q13, HarmShapeGain_Q14, Tilt_Q14, LF_shp_Q14, Gains_Q16, psEncCtrl->pitchL, Lambda_Q10, LTP_scale_Q14, psEnc->sCmn.arch ); } } @@ -179,7 +179,8 @@ void silk_quant_LTP_gains_FLP( const silk_float W[ MAX_NB_SUBFR * LTP_ORDER * LTP_ORDER ], /* I Error weights */ const opus_int mu_Q10, /* I Mu value (R/D tradeoff) */ const opus_int lowComplexity, /* I Flag for low complexity */ - const opus_int nb_subfr /* I number of subframes */ + const opus_int nb_subfr, /* I number of subframes */ + int arch /* I Run-time architecture */ ) { opus_int i; @@ -193,7 +194,7 @@ void silk_quant_LTP_gains_FLP( W_Q18[ i ] = (opus_int32)silk_float2int( W[ i ] * 262144.0f ); } - silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr ); + silk_quant_LTP_gains( B_Q14, cbk_index, periodicity_index, sum_log_gain_Q7, W_Q18, mu_Q10, lowComplexity, nb_subfr, arch ); for( i = 0; i < nb_subfr * LTP_ORDER; i++ ) { B[ i ] = (silk_float)B_Q14[ i ] * ( 1.0f / 16384.0f ); |