From eae137c6952adae3c92d5dfe8de273e9557e3f12 Mon Sep 17 00:00:00 2001 From: Koen Vos Date: Wed, 13 Jan 2016 11:54:40 +0800 Subject: faster Burg implementation --- silk/fixed/burg_modified_FIX.c | 214 ++++++++++++++++++++--------------------- silk/float/burg_modified_FLP.c | 162 +++++++++++++++---------------- src/opus_demo.c | 7 +- 3 files changed, 187 insertions(+), 196 deletions(-) diff --git a/silk/fixed/burg_modified_FIX.c b/silk/fixed/burg_modified_FIX.c index be79e11d..1b6373e9 100644 --- a/silk/fixed/burg_modified_FIX.c +++ b/silk/fixed/burg_modified_FIX.c @@ -34,7 +34,7 @@ POSSIBILITY OF SUCH DAMAGE. #include "tuning_parameters.h" #include "pitch.h" -#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384 */ +/* This code implements the method from https://www.opus-codec.org/docs/vos_fastburg.pdf */ #define QA 25 #define N_BITS_HEAD_ROOM 2 @@ -54,45 +54,44 @@ void silk_burg_modified_c( int arch /* I Run-time architecture */ ) { - opus_int k, n, s, lz, rshifts, reached_max_gain; + opus_int k, n, s, lz, rshifts, rshifts_extra, reached_max_gain; opus_int32 C0, num, nrg, rc_Q31, invGain_Q30, Atmp_QA, Atmp1, tmp1, tmp2, x1, x2; const opus_int16 *x_ptr; - opus_int32 C_first_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 C_last_row[ SILK_MAX_ORDER_LPC ]; - opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; - opus_int32 CAf[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 CAb[ SILK_MAX_ORDER_LPC + 1 ]; - opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; - opus_int64 C0_64; - - silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + opus_int32 Af_QA[ SILK_MAX_ORDER_LPC ]; + opus_int32 g[ SILK_MAX_ORDER_LPC + 1 ]; + opus_int32 c[ SILK_MAX_ORDER_LPC + 1 ]; /* Compute autocorrelations, added over subframes */ - C0_64 = silk_inner_prod16_aligned_64( x, x, subfr_length*nb_subfr, arch ); - lz = silk_CLZ64(C0_64); - rshifts = 32 + 1 + N_BITS_HEAD_ROOM - lz; - if (rshifts > MAX_RSHIFTS) rshifts = MAX_RSHIFTS; - if (rshifts < MIN_RSHIFTS) rshifts = MIN_RSHIFTS; - - if (rshifts > 0) { - C0 = (opus_int32)silk_RSHIFT64(C0_64, rshifts ); + silk_sum_sqr_shift( &C0, &rshifts, x, nb_subfr * subfr_length ); + if( rshifts > MAX_RSHIFTS ) { + C0 = silk_LSHIFT32( C0, rshifts - MAX_RSHIFTS ); + silk_assert( C0 > 0 ); + rshifts = MAX_RSHIFTS; } else { - C0 = silk_LSHIFT32((opus_int32)C0_64, -rshifts ); + lz = silk_CLZ32( C0 ) - 1; + rshifts_extra = N_BITS_HEAD_ROOM - lz; + if( rshifts_extra > 0 ) { + rshifts_extra = silk_min( rshifts_extra, MAX_RSHIFTS - rshifts ); + C0 = silk_RSHIFT32( C0, rshifts_extra ); + } else { + rshifts_extra = silk_max( rshifts_extra, MIN_RSHIFTS - rshifts ); + C0 = silk_LSHIFT32( C0, -rshifts_extra ); + } + rshifts += rshifts_extra; } - - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ - silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); + silk_memset( c, 0, (D+1) * sizeof( opus_int32 ) ); if( rshifts > 0 ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += (opus_int32)silk_RSHIFT64( + c[ n ] += (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr + n, subfr_length - n, arch ), rshifts ); } } } else { for( s = 0; s < nb_subfr; s++ ) { int i; + opus_int32 xcorr[ SILK_MAX_ORDER_LPC ]; opus_int32 d; x_ptr = x + s * subfr_length; celt_pitch_xcorr(x_ptr, x_ptr + 1, xcorr, subfr_length - D, D, arch ); @@ -102,23 +101,50 @@ void silk_burg_modified_c( xcorr[ n - 1 ] += d; } for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); + c[ n ] += silk_LSHIFT32( xcorr[ n - 1 ], -rshifts ); } } } - silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( opus_int32 ) ); - /* Initialize */ - CAb[ 0 ] = CAf[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ + /* Multiply all correlations by 2 */ + rshifts++; - invGain_Q30 = (opus_int32)1 << 30; + /* Initialize */ + c[ 0 ] = C0 + silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ) + 1; /* Q(-rshifts) */ + g[ 0 ] = c[ 0 ]; /* Q(-rshifts) */ + tmp1 = 0; + if( rshifts > -N_BITS_HEAD_ROOM ) { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + x1 = x_ptr[ 0 ]; + x2 = x_ptr[ subfr_length - 1 ]; + tmp1 = silk_SMLAWB( tmp1, silk_LSHIFT32( x1, 16 - rshifts ), x1 ); /* Q(-rshifts) */ + tmp1 = silk_SMLAWB( tmp1, silk_LSHIFT32( x2, 16 - rshifts ), x2 ); /* Q(-rshifts) */ + } + } else { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + x1 = x_ptr[ 0 ]; + x2 = x_ptr[ subfr_length - 1 ]; + tmp1 = silk_MLA( tmp1, silk_LSHIFT32( x1, -rshifts ), x1 ); /* Q(-rshifts) */ + tmp1 = silk_MLA( tmp1, silk_LSHIFT32( x2, -rshifts ), x2 ); /* Q(-rshifts) */ + } + } + g[ 0 ] -= tmp1; + g[ 1 ] = c[ 1 ]; + silk_assert( g[ 1 ] < g[ 0 ] && g[ 1 ] > -g[ 0 ] ); + rc_Q31 = -silk_DIV32_varQ( g[ 1 ], g[ 0 ], 31 ); + Af_QA[ 0 ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ + invGain_Q30 = SILK_FIX_CONST( 1, 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); reached_max_gain = 0; - for( n = 0; n < D; n++ ) { - /* Update first row of correlation matrix (without first element) */ - /* Update last row of correlation matrix (without last element, stored in reversed order) */ - /* Update C * Af */ - /* Update C * flipud(Af) (stored in reversed order) */ - if( rshifts > -2 ) { + for( n = 1; n < D; n++ ) { + for( k = 0; k < (n >> 1) + 1; k++ ) { + tmp1 = g[ k ]; + tmp2 = g[ n - k ]; + g[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q(-rshifts) */ + g[ n - k ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q(-rshifts) */ + } + if( rshifts > -N_BITS_HEAD_ROOM ) { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], 16 - rshifts ); /* Q(16-rshifts) */ @@ -126,29 +152,29 @@ void silk_burg_modified_c( tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], QA - 16 ); /* Q(QA-16) */ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], QA - 16 ); /* Q(QA-16) */ for( k = 0; k < n; k++ ) { - C_first_row[ k ] = silk_SMLAWB( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_SMLAWB( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ + c[ k + 1 ] = silk_SMLAWB( silk_SMLAWB( c[ k + 1 ], x1, x_ptr[ n - k - 1 ] ), /* Q( -rshifts ) */ + x2, x_ptr[ subfr_length - n + k ] ); Atmp_QA = Af_QA[ k ]; tmp1 = silk_SMLAWB( tmp1, Atmp_QA, x_ptr[ n - k - 1 ] ); /* Q(QA-16) */ tmp2 = silk_SMLAWB( tmp2, Atmp_QA, x_ptr[ subfr_length - n + k ] ); /* Q(QA-16) */ } - tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ + tmp1 = silk_LSHIFT32( -tmp1, 32 - QA - rshifts ); /* Q(16-rshifts) */ tmp2 = silk_LSHIFT32( -tmp2, 32 - QA - rshifts ); /* Q(16-rshifts) */ for( k = 0; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWB( CAf[ k ], tmp1, x_ptr[ n - k ] ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWB( CAb[ k ], tmp2, x_ptr[ subfr_length - n + k - 1 ] ); /* Q( -rshift ) */ + g[ k ] = silk_SMLAWB( silk_SMLAWB( g[ k ], tmp1, x_ptr[ n - k ] ), /* Q( -rshift ) */ + tmp2, x_ptr[ subfr_length - n + k - 1 ] ); } } } else { for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ + x1 = -silk_LSHIFT32( (opus_int32)x_ptr[ n ], -rshifts ); /* Q( -rshifts ) */ x2 = -silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], -rshifts ); /* Q( -rshifts ) */ tmp1 = silk_LSHIFT32( (opus_int32)x_ptr[ n ], 17 ); /* Q17 */ tmp2 = silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n - 1 ], 17 ); /* Q17 */ for( k = 0; k < n; k++ ) { - C_first_row[ k ] = silk_MLA( C_first_row[ k ], x1, x_ptr[ n - k - 1 ] ); /* Q( -rshifts ) */ - C_last_row[ k ] = silk_MLA( C_last_row[ k ], x2, x_ptr[ subfr_length - n + k ] ); /* Q( -rshifts ) */ + c[ k + 1 ] = silk_MLA( silk_MLA( c[ k + 1 ], x1, x_ptr[ n - k - 1 ] ), /* Q( -rshifts ) */ + x2, x_ptr[ subfr_length - n + k ] ); Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 17 ); /* Q17 */ tmp1 = silk_MLA( tmp1, x_ptr[ n - k - 1 ], Atmp1 ); /* Q17 */ tmp2 = silk_MLA( tmp2, x_ptr[ subfr_length - n + k ], Atmp1 ); /* Q17 */ @@ -156,50 +182,44 @@ void silk_burg_modified_c( tmp1 = -tmp1; /* Q17 */ tmp2 = -tmp2; /* Q17 */ for( k = 0; k <= n; k++ ) { - CAf[ k ] = silk_SMLAWW( CAf[ k ], tmp1, - silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ); /* Q( -rshift ) */ - CAb[ k ] = silk_SMLAWW( CAb[ k ], tmp2, - silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); /* Q( -rshift ) */ + g[ k ] = silk_SMLAWW( silk_SMLAWW( g[ k ], /* Q( -rshift ) */ + tmp1, silk_LSHIFT32( (opus_int32)x_ptr[ n - k ], -rshifts - 1 ) ), + tmp2, silk_LSHIFT32( (opus_int32)x_ptr[ subfr_length - n + k - 1 ], -rshifts - 1 ) ); } } } /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ - tmp1 = C_first_row[ n ]; /* Q( -rshifts ) */ - tmp2 = C_last_row[ n ]; /* Q( -rshifts ) */ + tmp1 = c[ n + 1 ]; /* Q( -rshifts ) */ num = 0; /* Q( -rshifts ) */ - nrg = silk_ADD32( CAb[ 0 ], CAf[ 0 ] ); /* Q( 1-rshifts ) */ + nrg = g[ 0 ]; /* Q( -rshifts ) */ for( k = 0; k < n; k++ ) { Atmp_QA = Af_QA[ k ]; lz = silk_CLZ32( silk_abs( Atmp_QA ) ) - 1; lz = silk_min( 32 - QA, lz ); Atmp1 = silk_LSHIFT32( Atmp_QA, lz ); /* Q( QA + lz ) */ - - tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( C_last_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - tmp2 = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( C_first_row[ n - k - 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - num = silk_ADD_LSHIFT32( num, silk_SMMUL( CAb[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ - nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( silk_ADD32( CAb[ k + 1 ], CAf[ k + 1 ] ), - Atmp1 ), 32 - QA - lz ); /* Q( 1-rshifts ) */ + tmp1 = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( c[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ + num = silk_ADD_LSHIFT32( num, silk_SMMUL( g[ n - k ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ + nrg = silk_ADD_LSHIFT32( nrg, silk_SMMUL( g[ k + 1 ], Atmp1 ), 32 - QA - lz ); /* Q( -rshifts ) */ } - CAf[ n + 1 ] = tmp1; /* Q( -rshifts ) */ - CAb[ n + 1 ] = tmp2; /* Q( -rshifts ) */ - num = silk_ADD32( num, tmp2 ); /* Q( -rshifts ) */ - num = silk_LSHIFT32( -num, 1 ); /* Q( 1-rshifts ) */ + g[ n + 1 ] = tmp1; /* Q( -rshifts ) */ + num = silk_ADD32( num, tmp1 ); /* Q( -rshifts ) */ + silk_assert( nrg > 0 ); /* Calculate the next order reflection (parcor) coefficient */ if( silk_abs( num ) < nrg ) { - rc_Q31 = silk_DIV32_varQ( num, nrg, 31 ); + rc_Q31 = -silk_DIV32_varQ( num, nrg, 31 ); } else { - rc_Q31 = ( num > 0 ) ? silk_int32_MAX : silk_int32_MIN; + rc_Q31 = ( num > 0 ) ? silk_int32_MIN : silk_int32_MAX; } /* Update inverse prediction gain */ - tmp1 = ( (opus_int32)1 << 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); + tmp1 = SILK_FIX_CONST( 1, 30 ) - silk_SMMUL( rc_Q31, rc_Q31 ); tmp1 = silk_LSHIFT( silk_SMMUL( invGain_Q30, tmp1 ), 2 ); if( tmp1 <= minInvGain_Q30 ) { /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ - tmp2 = ( (opus_int32)1 << 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ - rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ + tmp2 = SILK_FIX_CONST( 1, 30 ) - silk_DIV32_varQ( minInvGain_Q30, invGain_Q30, 30 ); /* Q30 */ + rc_Q31 = silk_SQRT_APPROX( tmp2 ); /* Q15 */ if( rc_Q31 > 0 ) { /* Newton-Raphson iteration */ rc_Q31 = silk_RSHIFT32( rc_Q31 + silk_DIV32( tmp2, rc_Q31 ), 1 ); /* Q15 */ @@ -217,12 +237,12 @@ void silk_burg_modified_c( /* Update the AR coefficients */ for( k = 0; k < (n + 1) >> 1; k++ ) { - tmp1 = Af_QA[ k ]; /* QA */ - tmp2 = Af_QA[ n - k - 1 ]; /* QA */ - Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ - Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ + tmp1 = Af_QA[ k ]; /* QA */ + tmp2 = Af_QA[ n - k - 1 ]; /* QA */ + Af_QA[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* QA */ + Af_QA[ n - k - 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* QA */ } - Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ + Af_QA[ n ] = silk_RSHIFT32( rc_Q31, 31 - QA ); /* QA */ if( reached_max_gain ) { /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ @@ -231,47 +251,27 @@ void silk_burg_modified_c( } break; } - - /* Update C * Af and C * Ab */ - for( k = 0; k <= n + 1; k++ ) { - tmp1 = CAf[ k ]; /* Q( -rshifts ) */ - tmp2 = CAb[ n - k + 1 ]; /* Q( -rshifts ) */ - CAf[ k ] = silk_ADD_LSHIFT32( tmp1, silk_SMMUL( tmp2, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - CAb[ n - k + 1 ] = silk_ADD_LSHIFT32( tmp2, silk_SMMUL( tmp1, rc_Q31 ), 1 ); /* Q( -rshifts ) */ - } } - if( reached_max_gain ) { - for( k = 0; k < D; k++ ) { - /* Scale coefficients */ - A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); - } - /* Subtract energy of preceding samples from C0 */ - if( rshifts > 0 ) { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); - } - } else { - for( s = 0; s < nb_subfr; s++ ) { - x_ptr = x + s * subfr_length; - C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts); - } + for( k = 0; k < D; k++ ) { + /* Scale coefficients */ + A_Q16[ k ] = -silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); + } + + /* Subtract energy of preceding samples from C0 */ + rshifts--; /* divide c0 by two */ + if( rshifts > 0 ) { + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + C0 -= (opus_int32)silk_RSHIFT64( silk_inner_prod16_aligned_64( x_ptr, x_ptr, D, arch ), rshifts ); } - /* Approximate residual energy */ - *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); - *res_nrg_Q = -rshifts; } else { - /* Return residual energy */ - nrg = CAf[ 0 ]; /* Q( -rshifts ) */ - tmp1 = (opus_int32)1 << 16; /* Q16 */ - for( k = 0; k < D; k++ ) { - Atmp1 = silk_RSHIFT_ROUND( Af_QA[ k ], QA - 16 ); /* Q16 */ - nrg = silk_SMLAWW( nrg, CAf[ k + 1 ], Atmp1 ); /* Q( -rshifts ) */ - tmp1 = silk_SMLAWW( tmp1, Atmp1, Atmp1 ); /* Q16 */ - A_Q16[ k ] = -Atmp1; + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + C0 -= silk_LSHIFT32( silk_inner_prod_aligned( x_ptr, x_ptr, D, arch), -rshifts); } - *res_nrg = silk_SMLAWW( nrg, silk_SMMUL( SILK_FIX_CONST( FIND_LPC_COND_FAC, 32 ), C0 ), -tmp1 );/* Q( -rshifts ) */ - *res_nrg_Q = -rshifts; } + /* Approximate residual energy */ + *res_nrg = silk_LSHIFT( silk_SMMUL( invGain_Q30, C0 ), 2 ); + *res_nrg_Q = -rshifts; } diff --git a/silk/float/burg_modified_FLP.c b/silk/float/burg_modified_FLP.c index ea5dc25a..7ef4cac2 100644 --- a/silk/float/burg_modified_FLP.c +++ b/silk/float/burg_modified_FLP.c @@ -33,11 +33,11 @@ POSSIBILITY OF SUCH DAMAGE. #include "tuning_parameters.h" #include "define.h" -#define MAX_FRAME_SIZE 384 /* subfr_length * nb_subfr = ( 0.005 * 16000 + 16 ) * 4 = 384*/ +/* This code implements the method from https://www.opus-codec.org/docs/vos_fastburg.pdf */ /* Compute reflection coefficients from input signal */ -silk_float silk_burg_modified_FLP( /* O returns residual energy */ - silk_float A[], /* O prediction coefficients (length order) */ +silk_float silk_burg_modified_FLP( + silk_float af[], /* O prediction coefficients (length order) */ const silk_float x[], /* I input signal, length: nb_subfr*(D+L_sub) */ const silk_float minInvGain, /* I minimum inverse prediction gain */ const opus_int subfr_length, /* I input signal subframe length (incl. D preceding samples) */ @@ -46,75 +46,81 @@ silk_float silk_burg_modified_FLP( /* O returns residual energy ) { opus_int k, n, s, reached_max_gain; - double C0, invGain, num, nrg_f, nrg_b, rc, Atmp, tmp1, tmp2; + double invGain, num, nrg, rc, tmp1, tmp2, x1, x2, atmp; const silk_float *x_ptr; - double C_first_row[ SILK_MAX_ORDER_LPC ], C_last_row[ SILK_MAX_ORDER_LPC ]; - double CAf[ SILK_MAX_ORDER_LPC + 1 ], CAb[ SILK_MAX_ORDER_LPC + 1 ]; - double Af[ SILK_MAX_ORDER_LPC ]; - - silk_assert( subfr_length * nb_subfr <= MAX_FRAME_SIZE ); + double c[ SILK_MAX_ORDER_LPC + 1 ]; + double g[ SILK_MAX_ORDER_LPC + 1 ]; + double a[ SILK_MAX_ORDER_LPC ]; /* Compute autocorrelations, added over subframes */ - C0 = silk_energy_FLP( x, nb_subfr * subfr_length ); - silk_memset( C_first_row, 0, SILK_MAX_ORDER_LPC * sizeof( double ) ); + silk_memset( c, 0, (D + 1) * sizeof( double ) ); for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - for( n = 1; n < D + 1; n++ ) { - C_first_row[ n - 1 ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n ); + for( n = 0; n < D + 1; n++ ) { + c[ n ] += silk_inner_product_FLP( x_ptr, x_ptr + n, subfr_length - n ); } } - silk_memcpy( C_last_row, C_first_row, SILK_MAX_ORDER_LPC * sizeof( double ) ); + for( n = 0; n < D + 1; n++ ) { + c[ n ] *= 2.0; + } /* Initialize */ - CAb[ 0 ] = CAf[ 0 ] = C0 + FIND_LPC_COND_FAC * C0 + 1e-9f; - invGain = 1.0f; + c[ 0 ] += FIND_LPC_COND_FAC * c[ 0 ] + 1e-9f ; + g[ 0 ] = c[ 0 ]; + tmp1 = 0.0f; + for( s = 0; s < nb_subfr; s++ ) { + x_ptr = x + s * subfr_length; + x1 = x_ptr[ 0 ]; + x2 = x_ptr[ subfr_length - 1 ]; + tmp1 += x1 * x1 + x2 * x2; + } + g[ 0 ] -= tmp1; + g[ 1 ] = c[ 1 ]; + rc = - g[ 1 ] / g[ 0 ]; + silk_assert( rc > -1.0 && rc < 1.0 ); + a[ 0 ] = rc; + invGain = ( 1.0 - rc * rc ); reached_max_gain = 0; - for( n = 0; n < D; n++ ) { - /* Update first row of correlation matrix (without first element) */ - /* Update last row of correlation matrix (without last element, stored in reversed order) */ - /* Update C * Af */ - /* Update C * flipud(Af) (stored in reversed order) */ + for( n = 1; n < D; n++ ) { + for( k = 0; k < (n >> 1) + 1; k++ ) { + tmp1 = g[ k ]; + tmp2 = g[ n - k ]; + g[ k ] = tmp1 + rc * tmp2; + g[ n - k ] = tmp2 + rc * tmp1; + } for( s = 0; s < nb_subfr; s++ ) { x_ptr = x + s * subfr_length; - tmp1 = x_ptr[ n ]; - tmp2 = x_ptr[ subfr_length - n - 1 ]; + x1 = x_ptr[ n ]; + x2 = x_ptr[ subfr_length - n - 1 ]; + tmp1 = x1; + tmp2 = x2; for( k = 0; k < n; k++ ) { - C_first_row[ k ] -= x_ptr[ n ] * x_ptr[ n - k - 1 ]; - C_last_row[ k ] -= x_ptr[ subfr_length - n - 1 ] * x_ptr[ subfr_length - n + k ]; - Atmp = Af[ k ]; - tmp1 += x_ptr[ n - k - 1 ] * Atmp; - tmp2 += x_ptr[ subfr_length - n + k ] * Atmp; + atmp = a[ k ]; + c[ k + 1 ] -= x1 * x_ptr[ n - k - 1 ] + x2 * x_ptr[ subfr_length - n + k ]; + tmp1 += x_ptr[ n - k - 1 ] * atmp; + tmp2 += x_ptr[ subfr_length - n + k ] * atmp; } for( k = 0; k <= n; k++ ) { - CAf[ k ] -= tmp1 * x_ptr[ n - k ]; - CAb[ k ] -= tmp2 * x_ptr[ subfr_length - n + k - 1 ]; + g[ k ] -= tmp1 * x_ptr[ n - k ] + tmp2 * x_ptr[ subfr_length - n + k - 1 ]; } } - tmp1 = C_first_row[ n ]; - tmp2 = C_last_row[ n ]; - for( k = 0; k < n; k++ ) { - Atmp = Af[ k ]; - tmp1 += C_last_row[ n - k - 1 ] * Atmp; - tmp2 += C_first_row[ n - k - 1 ] * Atmp; - } - CAf[ n + 1 ] = tmp1; - CAb[ n + 1 ] = tmp2; /* Calculate nominator and denominator for the next order reflection (parcor) coefficient */ - num = CAb[ n + 1 ]; - nrg_b = CAb[ 0 ]; - nrg_f = CAf[ 0 ]; + tmp1 = c[ n + 1 ]; + num = 0.0f; + nrg = g[ 0 ]; for( k = 0; k < n; k++ ) { - Atmp = Af[ k ]; - num += CAb[ n - k ] * Atmp; - nrg_b += CAb[ k + 1 ] * Atmp; - nrg_f += CAf[ k + 1 ] * Atmp; + atmp = a[ k ]; + tmp1 += c[ n - k ] * atmp; + num += g[ n - k ] * atmp; + nrg += g[ k + 1 ] * atmp; } - silk_assert( nrg_f > 0.0 ); - silk_assert( nrg_b > 0.0 ); + g[ n + 1] = tmp1; + num += tmp1; + silk_assert( nrg > 0.0 ); /* Calculate the next order reflection (parcor) coefficient */ - rc = -2.0 * num / ( nrg_f + nrg_b ); + rc = -num / nrg; silk_assert( rc > -1.0 && rc < 1.0 ); /* Update inverse prediction gain */ @@ -123,7 +129,7 @@ silk_float silk_burg_modified_FLP( /* O returns residual energy /* Max prediction gain exceeded; set reflection coefficient such that max prediction gain is exactly hit */ rc = sqrt( 1.0 - minInvGain / invGain ); if( num > 0 ) { - /* Ensure adjusted reflection coefficients has the original sign */ + /* Ensure adjusted reflection coefficient has the original sign */ rc = -rc; } invGain = minInvGain; @@ -134,53 +140,35 @@ silk_float silk_burg_modified_FLP( /* O returns residual energy /* Update the AR coefficients */ for( k = 0; k < (n + 1) >> 1; k++ ) { - tmp1 = Af[ k ]; - tmp2 = Af[ n - k - 1 ]; - Af[ k ] = tmp1 + rc * tmp2; - Af[ n - k - 1 ] = tmp2 + rc * tmp1; + tmp1 = a[ k ]; + tmp2 = a[ n - k - 1 ]; + a[ k ] = tmp1 + rc * tmp2; + a[ n - k - 1 ] = tmp2 + rc * tmp1; } - Af[ n ] = rc; + a[ n ] = rc; if( reached_max_gain ) { /* Reached max prediction gain; set remaining coefficients to zero and exit loop */ for( k = n + 1; k < D; k++ ) { - Af[ k ] = 0.0; + a[ k ] = 0.0; } break; } - - /* Update C * Af and C * Ab */ - for( k = 0; k <= n + 1; k++ ) { - tmp1 = CAf[ k ]; - CAf[ k ] += rc * CAb[ n - k + 1 ]; - CAb[ n - k + 1 ] += rc * tmp1; - } } - if( reached_max_gain ) { - /* Convert to silk_float */ - for( k = 0; k < D; k++ ) { - A[ k ] = (silk_float)( -Af[ k ] ); - } - /* Subtract energy of preceding samples from C0 */ - for( s = 0; s < nb_subfr; s++ ) { - C0 -= silk_energy_FLP( x + s * subfr_length, D ); - } - /* Approximate residual energy */ - nrg_f = C0 * invGain; - } else { - /* Compute residual energy and store coefficients as silk_float */ - nrg_f = CAf[ 0 ]; - tmp1 = 1.0; - for( k = 0; k < D; k++ ) { - Atmp = Af[ k ]; - nrg_f += CAf[ k + 1 ] * Atmp; - tmp1 += Atmp * Atmp; - A[ k ] = (silk_float)(-Atmp); - } - nrg_f -= FIND_LPC_COND_FAC * C0 * tmp1; + /* Convert to silk_float */ + for( k = 0; k < D; k++ ) { + af[ k ] = (silk_float)( -a[ k ] ); + } + + nrg = c[ 0 ] * 0.5 * (1.0 - FIND_LPC_COND_FAC); + /* Subtract energy of preceding samples from C0 */ + for( s = 0; s < nb_subfr; s++ ) { + nrg -= silk_energy_FLP( x + s * subfr_length, D ); } + /* Approximate residual energy */ + nrg *= invGain; - /* Return residual energy */ - return (silk_float)nrg_f; + /* Return approximate residual energy */ + return (silk_float)nrg; } diff --git a/src/opus_demo.c b/src/opus_demo.c index 9d5c7939..d73af551 100644 --- a/src/opus_demo.c +++ b/src/opus_demo.c @@ -243,7 +243,7 @@ int main(int argc, char *argv[]) int stop=0; short *in, *out; int application=OPUS_APPLICATION_AUDIO; - double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, nrg; + double bits=0.0, bits_max=0.0, bits_act=0.0, bits2=0.0, bits_act2=0.0, nrg; double tot_samples=0; opus_uint64 tot_in, tot_out; int bandwidth=OPUS_AUTO; @@ -849,6 +849,7 @@ int main(int argc, char *argv[]) nrg /= frame_size * channels; if( nrg > 1e5 ) { bits_act += len[toggle]*8; + bits_act2 += len[toggle]*len[toggle]*64; count_act++; } } @@ -867,9 +868,11 @@ int main(int argc, char *argv[]) 1e-3*bits_max*sampling_rate/frame_size); if (!decode_only) fprintf (stderr, "active bitrate: %7.3f kb/s\n", - 1e-3*bits_act*sampling_rate/(1e-15+frame_size*(double)count_act)); + 1e-3*bits_act*sampling_rate/(frame_size*(double)count_act)); fprintf (stderr, "bitrate standard deviation: %7.3f kb/s\n", 1e-3*sqrt(bits2/count - bits*bits/(count*(double)count))*sampling_rate/frame_size); + fprintf (stderr, "active standard deviation: %7.3f kb/s\n", + 1e-3*sqrt(bits_act2/count_act - bits_act*bits_act/(count_act*(double)count_act))*sampling_rate/frame_size); /* Close any files to which intermediate results were stored */ SILK_DEBUG_STORE_CLOSE_FILES silk_TimerSave("opus_timing.txt"); -- cgit v1.2.1