diff options
author | Koen Vos <koen.vos@skype.net> | 2011-10-06 13:38:26 -0400 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2011-10-06 13:38:26 -0400 |
commit | 888756691836ca8ce419a870a768f910330fb9d1 (patch) | |
tree | 9cf2a47be95f56c4c71dc49cdf7da7fdd9cb6bb9 /silk | |
parent | 480ba7034810fec56625dcd9ceeeb01d75c8d755 (diff) | |
download | opus-888756691836ca8ce419a870a768f910330fb9d1.tar.gz |
SILK update
Simplifies mono/stereo switching in SILK
Fixes a quantization mismatch between encoder and decoder
Constrains the pitch lags in the same way in the encoder and decoder
Diffstat (limited to 'silk')
-rw-r--r-- | silk/decode_core.c | 4 | ||||
-rw-r--r-- | silk/decode_pitch.c | 2 | ||||
-rw-r--r-- | silk/enc_API.c | 68 | ||||
-rw-r--r-- | silk/float/pitch_analysis_core_FLP.c | 6 | ||||
-rw-r--r-- | silk/main.h | 1 | ||||
-rw-r--r-- | silk/pitch_analysis_core.c | 4 | ||||
-rw-r--r-- | silk/stereo_LR_to_MS.c | 11 |
7 files changed, 30 insertions, 66 deletions
diff --git a/silk/decode_core.c b/silk/decode_core.c index 6a07ee1a..b80a3895 100644 --- a/silk/decode_core.c +++ b/silk/decode_core.c @@ -91,10 +91,10 @@ void silk_decode_core( /* Preload LPC coeficients to array on stack. Gives small performance gain */ silk_memcpy( A_Q12_tmp, A_Q12, psDec->LPC_order * sizeof( opus_int16 ) ); B_Q14 = &psDecCtrl->LTPCoef_Q14[ k * LTP_ORDER ]; - Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 ); signalType = psDec->indices.signalType; - inv_gain_Q16 = silk_INVERSE32_varQ( Gain_Q10, 26 ); + Gain_Q10 = silk_RSHIFT( psDecCtrl->Gains_Q16[ k ], 6 ); + inv_gain_Q16 = silk_INVERSE32_varQ( psDecCtrl->Gains_Q16[ k ], 32 ); inv_gain_Q16 = silk_min( inv_gain_Q16, silk_int16_MAX ); /* Calculate Gain adjustment factor */ diff --git a/silk/decode_pitch.c b/silk/decode_pitch.c index 6aeb7ab2..bb21bc75 100644 --- a/silk/decode_pitch.c +++ b/silk/decode_pitch.c @@ -67,7 +67,7 @@ void silk_decode_pitch( } min_lag = silk_SMULBB( PE_MIN_LAG_MS, Fs_kHz ); - max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz ); + max_lag = silk_SMULBB( PE_MAX_LAG_MS, Fs_kHz ) - 1; lag = min_lag + lagIndex; for( k = 0; k < nb_subfr; k++ ) { diff --git a/silk/enc_API.c b/silk/enc_API.c index 3fa2715d..0fe945b6 100644 --- a/silk/enc_API.c +++ b/silk/enc_API.c @@ -119,44 +119,6 @@ opus_int silk_QueryEncoder( return ret; } -static void stereo_crossmix(const opus_int16 *in, opus_int16 *out, int channel, int len, int to_mono, int id) -{ - int i; - opus_int16 delta, g1, g2; - const opus_int16 *x1, *x2; - - x1 = in+channel; - x2 = in+(1-channel); - g1 = to_mono ? 16384: 8192; - g2 = to_mono ? 0 : 8192; - - /* We want to finish at 0.5 */ - delta = (16384+(len>>1))/(len); - if (to_mono) { - delta = -delta; - } - - i=0; - if (to_mono != 2) - { - if ( id==0 ) { - for ( ; i < len>>1; i++ ) { - out[ i ] = silk_RSHIFT_ROUND( silk_SMLABB( silk_SMULBB( x1[ 2*i ], g1 ), x2[ 2*i ], g2 ), 14 ); - g1 += delta; - g2 -= delta; - } - } - } - if (to_mono) { - for ( ; i < len; i++ ) { - out[ i ] = silk_RSHIFT( (opus_int32)x1[ 2*i ] + (opus_int32)x2[ 2*i ], 1 ); - } - } else { - for ( ; i < len; i++ ) { - out[ i ] = x1[ 2*i ]; - } - } -} /**************************/ /* Encode frame with Silk */ @@ -268,18 +230,13 @@ opus_int silk_Encode( /* Resample and write to buffer */ if( encControl->nChannelsAPI == 2 && encControl->nChannelsInternal == 2 ) { int id = psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded; - if ( encControl->toMono > 0) { - stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, encControl->toMono, id ); - } else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1 ) { - stereo_crossmix( samplesIn, buf, 0, nSamplesFromInput, 0, id ); - } else { - for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = samplesIn[ 2 * n ]; - } + for( n = 0; n < nSamplesFromInput; n++ ) { + buf[ n ] = samplesIn[ 2 * n ]; } /* Making sure to start both resamplers from the same state when switching from mono to stereo */ - if(psEnc->nPrevChannelsInternal == 1 && id==0) + if(psEnc->nPrevChannelsInternal == 1 && id==0) { silk_memcpy(&psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, sizeof(psEnc->state_Fxx[ 1 ].sCmn.resampler_state)); + } ret += silk_resampler( &psEnc->state_Fxx[ 0 ].sCmn.resampler_state, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ psEnc->state_Fxx[ 0 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); @@ -287,14 +244,8 @@ opus_int silk_Encode( nSamplesToBuffer = psEnc->state_Fxx[ 1 ].sCmn.frame_length - psEnc->state_Fxx[ 1 ].sCmn.inputBufIx; nSamplesToBuffer = silk_min( nSamplesToBuffer, 10 * nBlocksOf10ms * psEnc->state_Fxx[ 1 ].sCmn.fs_kHz ); - if ( encControl->toMono > 0) { - stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, encControl->toMono, id ); - } else if( psEnc->nPrevChannelsInternal == 1 || encControl->toMono == -1) { - stereo_crossmix( samplesIn, buf, 1, nSamplesFromInput, 0, id ); - } else { - for( n = 0; n < nSamplesFromInput; n++ ) { - buf[ n ] = samplesIn[ 2 * n + 1 ]; - } + for( n = 0; n < nSamplesFromInput; n++ ) { + buf[ n ] = samplesIn[ 2 * n + 1 ]; } ret += silk_resampler( &psEnc->state_Fxx[ 1 ].sCmn.resampler_state, &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ psEnc->state_Fxx[ 1 ].sCmn.inputBufIx + 2 ], buf, nSamplesFromInput ); @@ -394,9 +345,9 @@ opus_int silk_Encode( if( encControl->nChannelsInternal == 2 ) { silk_stereo_LR_to_MS( &psEnc->sStereo, &psEnc->state_Fxx[ 0 ].sCmn.inputBuf[ 2 ], &psEnc->state_Fxx[ 1 ].sCmn.inputBuf[ 2 ], psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], &psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ], - MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, + MStargetRates_bps, TargetRate_bps, psEnc->state_Fxx[ 0 ].sCmn.speech_activity_Q8, encControl->toMono, psEnc->state_Fxx[ 0 ].sCmn.fs_kHz, psEnc->state_Fxx[ 0 ].sCmn.frame_length ); - if (!prefillFlag) { + if( !prefillFlag ) { silk_stereo_encode_pred( psRangeEnc, psEnc->sStereo.predIx[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); silk_stereo_encode_mid_only( psRangeEnc, psEnc->sStereo.mid_only_flags[ psEnc->state_Fxx[ 0 ].sCmn.nFramesEncoded ] ); } @@ -437,8 +388,9 @@ opus_int silk_Encode( flags = silk_LSHIFT( flags, 1 ); flags |= psEnc->state_Fxx[ n ].sCmn.LBRR_flag; } - if (!prefillFlag) + if( !prefillFlag ) { ec_enc_patch_initial_bits( psRangeEnc, flags, ( psEnc->state_Fxx[ 0 ].sCmn.nFramesPerPacket + 1 ) * encControl->nChannelsInternal ); + } /* Return zero bytes if all channels DTXed */ if( psEnc->state_Fxx[ 0 ].sCmn.inDTX && ( encControl->nChannelsInternal == 1 || psEnc->state_Fxx[ 1 ].sCmn.inDTX ) ) { diff --git a/silk/float/pitch_analysis_core_FLP.c b/silk/float/pitch_analysis_core_FLP.c index f2615b5d..c9495915 100644 --- a/silk/float/pitch_analysis_core_FLP.c +++ b/silk/float/pitch_analysis_core_FLP.c @@ -467,17 +467,19 @@ opus_int silk_pitch_analysis_core_FLP( /* O voicing estimate: 0 voiced, 1 unvoic for( k = 0; k < nb_subfr; k++ ) { pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, max_lag ); } *lagIndex = (opus_int16)( lag_new - min_lag ); *contourIndex = (opus_int8)CBimax; - } else { + } else { /* Fs_kHz == 8 */ /* Save Lags and correlation */ silk_assert( CCmax >= 0.0f ); *LTPCorr = (silk_float)sqrt( CCmax / nb_subfr ); /* Output normalized correlation */ for( k = 0; k < nb_subfr; k++ ) { pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, max_lag_8kHz ); } - *lagIndex = (opus_int16)( lag - min_lag ); + *lagIndex = (opus_int16)( lag - min_lag_8kHz ); *contourIndex = (opus_int8)CBimax; } silk_assert( *lagIndex >= 0 ); diff --git a/silk/main.h b/silk/main.h index c30c63ff..d7ed22ce 100644 --- a/silk/main.h +++ b/silk/main.h @@ -54,6 +54,7 @@ void silk_stereo_LR_to_MS( opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ opus_int32 total_rate_bps, /* I Total bitrate */ opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ + opus_int toMono, /* I Last frame before a stereo->mono transition */ opus_int fs_kHz, /* I Sample rate (kHz) */ opus_int frame_length /* I Number of samples */ ); diff --git a/silk/pitch_analysis_core.c b/silk/pitch_analysis_core.c index 70438474..eaa6209d 100644 --- a/silk/pitch_analysis_core.c +++ b/silk/pitch_analysis_core.c @@ -558,15 +558,17 @@ opus_int silk_pitch_analysis_core( /* O Voicing estimate: 0 voiced, 1 for( k = 0; k < nb_subfr; k++ ) { pitch_out[ k ] = lag_new + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag, max_lag ); } *lagIndex = (opus_int16)( lag_new - min_lag); *contourIndex = (opus_int8)CBimax; - } else { + } else { /* Fs_kHz == 8 */ /* Save Lags and correlation */ CCmax = silk_max( CCmax, 0 ); *LTPCorr_Q15 = (opus_int)silk_SQRT_APPROX( silk_LSHIFT( CCmax, 13 ) ); /* Output normalized correlation */ for( k = 0; k < nb_subfr; k++ ) { pitch_out[ k ] = lag + matrix_ptr( Lag_CB_ptr, k, CBimax, cbk_size ); + pitch_out[ k ] = silk_LIMIT( pitch_out[ k ], min_lag_8kHz, max_lag_8kHz ); } *lagIndex = (opus_int16)( lag - min_lag_8kHz ); *contourIndex = (opus_int8)CBimax; diff --git a/silk/stereo_LR_to_MS.c b/silk/stereo_LR_to_MS.c index 5f3eee57..9db12444 100644 --- a/silk/stereo_LR_to_MS.c +++ b/silk/stereo_LR_to_MS.c @@ -41,6 +41,7 @@ void silk_stereo_LR_to_MS( opus_int32 mid_side_rates_bps[], /* O Bitrates for mid and side signals */ opus_int32 total_rate_bps, /* I Total bitrate */ opus_int prev_speech_act_Q8, /* I Speech activity level in previous frame */ + opus_int toMono, /* I Last frame before a stereo->mono transition */ opus_int fs_kHz, /* I Sample rate (kHz) */ opus_int frame_length /* I Number of samples */ ) @@ -96,7 +97,7 @@ void silk_stereo_LR_to_MS( /* Determine bitrate distribution between mid and side, and possibly reduce stereo width */ total_rate_bps -= is10msFrame ? 1200 : 600; /* Subtract approximate bitrate for coding stereo parameters */ - if (total_rate_bps < 1 ) { + if( total_rate_bps < 1 ) { total_rate_bps = 1; } min_mid_rate_bps = silk_SMLABB( 2000, fs_kHz, 900 ); @@ -122,7 +123,13 @@ void silk_stereo_LR_to_MS( /* At very low bitrates or for inputs that are nearly amplitude panned, switch to panned-mono coding */ *mid_only_flag = 0; - if( state->width_prev_Q14 == 0 && + if( toMono ) { + /* Last frame before stereo->mono transition; collapse stereo width */ + width_Q14 = 0; + pred_Q13[ 0 ] = 0; + pred_Q13[ 1 ] = 0; + silk_stereo_quant_pred( pred_Q13, ix ); + } else if( state->width_prev_Q14 == 0 && ( 8 * total_rate_bps < 13 * min_mid_rate_bps || silk_SMULWB( frac_Q16, state->smth_width_Q14 ) < SILK_FIX_CONST( 0.05, 14 ) ) ) { /* Code as panned-mono; previous frame already had zero width */ |