From f02ad480f044d544ddc728db27ef69b109c1c920 Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Fri, 18 Nov 2016 16:56:35 -0500 Subject: Fixing bandwidth detection for 24 kHz analysis --- src/analysis.c | 24 +++++++++++++++++++----- src/analysis.h | 5 +++-- src/opus_encoder.c | 25 +++++++++++++++++-------- src/opus_private.h | 6 +++--- 4 files changed, 42 insertions(+), 18 deletions(-) diff --git a/src/analysis.c b/src/analysis.c index 7eb229e1..1c12aa24 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -104,7 +104,7 @@ static const int tbands[NB_TBANDS+1] = { }; static const int extra_bands[NB_TOT_BANDS+1] = { - 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120, 160, 200 + 1, 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120 }; /*static const float tweight[NB_TBANDS+1] = { @@ -225,6 +225,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt float noise_floor; int remaining; AnalysisInfo *info; + float hp_ener; SAVE_STACK; tonal->last_transition++; @@ -241,7 +242,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt kfft = celt_mode->mdct.kfft[0]; if (tonal->count==0) tonal->mem_fill = 240; - downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); + tonal->hp_ener_accum += downmix(x, &tonal->inmem[tonal->mem_fill], IMIN(len, ANALYSIS_BUF_SIZE-tonal->mem_fill), offset, c1, c2, C); if (tonal->mem_fill+len < ANALYSIS_BUF_SIZE) { tonal->mem_fill += len; @@ -249,6 +250,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt RESTORE_STACK; return; } + hp_ener = tonal->hp_ener_accum; info = &tonal->info[tonal->write_pos++]; if (tonal->write_pos>=DETECT_SIZE) tonal->write_pos-=DETECT_SIZE; @@ -267,7 +269,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt } OPUS_MOVE(tonal->inmem, tonal->inmem+ANALYSIS_BUF_SIZE-240, 240); remaining = len - (ANALYSIS_BUF_SIZE-tonal->mem_fill); - downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); + tonal->hp_ener_accum = downmix(x, &tonal->inmem[240], remaining, offset+ANALYSIS_BUF_SIZE-tonal->mem_fill, c1, c2, C); tonal->mem_fill = 240 + remaining; opus_fft(kfft, in, out, tonal->arch); #ifndef FIXED_POINT @@ -417,8 +419,8 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt float E=0; int band_start, band_end; /* Keep a margin of 300 Hz for aliasing */ - band_start = extra_bands[b]; - band_end = extra_bands[b+1]; + band_start = 2*extra_bands[b]; + band_end = 2*extra_bands[b+1]; for (i=band_start;i.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*(band_end-band_start)) bandwidth = b; } + /* Special case for the last two bands, for which we don't have spectrum but only + the energy above 12 kHz. */ + { + float E = hp_ener*(1./(240*240)); + maxE = MAX32(maxE, E); + tonal->meanE[b] = MAX32((1-alphaE2)*tonal->meanE[b], E); + E = MAX32(E, tonal->meanE[b]); + /* Use a simple follower with 13 dB/Bark slope for spreading function */ + bandwidth_mask = MAX32(.05f*bandwidth_mask, E); + if (E>.1*bandwidth_mask && E*1e9f > maxE && E > noise_floor*160) + bandwidth = 20; + } if (tonal->count<=2) bandwidth = 20; frame_loudness = 20*(float)log10(frame_loudness); diff --git a/src/analysis.h b/src/analysis.h index 86bd6340..5ed791b1 100644 --- a/src/analysis.h +++ b/src/analysis.h @@ -33,7 +33,7 @@ #define NB_FRAMES 8 #define NB_TBANDS 18 -#define NB_TOT_BANDS 21 +#define NB_TOT_BANDS 19 #define ANALYSIS_BUF_SIZE 720 /* 15 ms at 48 kHz */ #define DETECT_SIZE 200 @@ -51,7 +51,7 @@ typedef struct { float E[NB_FRAMES][NB_TBANDS]; float lowE[NB_TBANDS]; float highE[NB_TBANDS]; - float meanE[NB_TOT_BANDS]; + float meanE[NB_TOT_BANDS+1]; float mem[32]; float cmean[8]; float std[9]; @@ -76,6 +76,7 @@ typedef struct { int write_pos; int read_pos; int read_subframe; + float hp_ener_accum; AnalysisInfo info[DETECT_SIZE]; } TonalityAnalysisState; diff --git a/src/opus_encoder.c b/src/opus_encoder.c index f64bd346..3bbd5700 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -579,7 +579,7 @@ static opus_int32 user_bitrate_to_bitrate(OpusEncoder *st, int frame_size, int m #endif #ifndef FIXED_POINT -void silk_resampler_down2_float( +float silk_resampler_down2_float( opus_val32 *S, /* I/O State vector [ 2 ] */ opus_val16 *out, /* O Output signal [ floor(len/2) ] */ const opus_val16 *in, /* I Input signal [ len ] */ @@ -587,8 +587,8 @@ void silk_resampler_down2_float( ) { int k, len2 = inLen/2; - opus_val32 in32, out32, Y, X; - + opus_val32 in32, out32, out32_hp, Y, X; + float hp_ener = 0; /* Internal variables and state are in Q10 format */ for( k = 0; k < len2; k++ ) { /* Convert to Q10 */ @@ -599,7 +599,7 @@ void silk_resampler_down2_float( X = 0.6074371f*Y; out32 = ADD32( S[ 0 ], X ); S[ 0 ] = ADD32( in32, X ); - + out32_hp = out32; /* Convert to Q10 */ in32 = in[ 2 * k + 1 ]; @@ -610,13 +610,21 @@ void silk_resampler_down2_float( out32 = ADD32( out32, X ); S[ 1 ] = ADD32( in32, X ); + Y = SUB32( -in32, S[ 2 ] ); + X = 0.15063f*Y; + out32_hp = ADD32( out32_hp, S[ 2 ] ); + out32_hp = ADD32( out32_hp, X ); + S[ 2 ] = ADD32( -in32, X ); + + hp_ener += out32_hp*out32_hp; /* Add, convert back to int16 and store to output */ out[ k ] = .5*out32; } + return hp_ener; } #endif -void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) +opus_val32 downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, int c1, int c2, int C) { const float *x; opus_val32 scale; @@ -648,11 +656,12 @@ void downmix_float(const void *_x, opus_val32 *sub, int subframe, int offset, in scale /= 2; for (j=0;j