From dc6a6e333d25814d6fcbc4043d894b35bc6c237a Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Mon, 14 Nov 2011 17:58:29 +0800 Subject: Cleans up the most ugly parts of the analysis code Should be mostly usable now. --- celt/celt.c | 96 +++++++++++++++++++++--------------------- celt/celt.h | 18 +++++++- src/analysis.c | 119 ++++++++++++++++++++++++++++++++--------------------- src/opus_encoder.c | 41 +++++++++++++----- 4 files changed, 166 insertions(+), 108 deletions(-) diff --git a/celt/celt.c b/celt/celt.c index 5af041f9..7e5866ff 100644 --- a/celt/celt.c +++ b/celt/celt.c @@ -177,8 +177,7 @@ struct OpusCustomEncoder { int prefilter_tapset_old; #endif int consec_transient; - int frame_tonality; - int tonality_slope; + AnalysisInfo analysis; opus_val32 preemph_memE[2]; opus_val32 preemph_memD[2]; @@ -701,9 +700,6 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, return tf_select; } -extern int boost_band[2]; -extern float boost_amount[2]; - static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, int tf_select, ec_enc *enc) { int curr, i; @@ -795,7 +791,7 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C) } static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, - const opus_val16 *bandLogE, int end, int LM, int C, int N0, float tonality_slope) + const opus_val16 *bandLogE, int end, int LM, int C, int N0, AnalysisInfo *analysis) { int i; opus_val32 diff=0; @@ -836,7 +832,6 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, result of a bug in the loop above */ diff /= 2*C*(end-1); /*printf("%f\n", diff);*/ -#if 1 if (diff > QCONST16(2.f, DB_SHIFT)) trim_index--; if (diff > QCONST16(8.f, DB_SHIFT)) @@ -845,23 +840,23 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, trim_index++; if (diff < -QCONST16(10.f, DB_SHIFT)) trim_index++; +#ifndef FIXED_POINT + if (0 && analysis->valid) + { + if (analysis->tonality_slope > .15) + trim_index--; + if (analysis->tonality_slope > .3) + trim_index--; + if (analysis->tonality_slope < -.15) + trim_index++; + if (analysis->tonality_slope < -.3) + trim_index++; + } #endif -#if 0 - if (tonality_slope > .15) - trim_index--; - if (tonality_slope > .3) - trim_index--; - if (tonality_slope < -.15) - trim_index++; - if (tonality_slope < -.3) - trim_index++; -#endif - //printf("%f\n", tonality_slope); if (trim_index<0) trim_index = 0; if (trim_index>10) trim_index = 10; - //printf("%f %d\n", tonality_slope, trim_index); #ifdef FUZZING trim_index = rand()%11; #endif @@ -1309,6 +1304,7 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f st->spread_decision = spreading_decision(st->mode, X, &st->tonal_average, st->spread_decision, &st->hf_average, &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); + /*printf("%f %d\n", st->analysis.tonality_slope, st->tapset_decision);*/ /*if (st->frame_tonality > .7*32768) st->spread_decision = SPREAD_NONE; else if (st->frame_tonality > .3*32768) @@ -1362,18 +1358,19 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f #endif } } - if (0) +#ifndef FIXED_POINT + if (0 && st->analysis.valid) { - if (boost_amount[0]>.2) - offsets[boost_band[0]]+=2; - if (boost_amount[0]>.4) - offsets[boost_band[0]]+=2; - if (boost_amount[1]>.2) - offsets[boost_band[1]]+=2; - if (boost_amount[1]>.4) - offsets[boost_band[1]]+=2; - //printf("%f %f\n", boost_amount[0], boost_amount[1]); + if (st->analysis.boost_amount[0]>.2) + offsets[st->analysis.boost_band[0]]+=2; + if (st->analysis.boost_amount[0]>.4) + offsets[st->analysis.boost_band[0]]+=2; + if (st->analysis.boost_amount[1]>.2) + offsets[st->analysis.boost_band[1]]+=2; + if (st->analysis.boost_amount[1]>.4) + offsets[st->analysis.boost_band[1]]+=2; } +#endif dynalloc_logp = 6; total_bits<<=BITRES; total_boost = 0; @@ -1412,7 +1409,7 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f if (tell+(6<mode, X, bandLogE, - st->end, LM, C, N, st->tonality_slope/16384.); + st->end, LM, C, N, &st->analysis); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } @@ -1455,37 +1452,43 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f /* The target rate in 8th bits per frame */ opus_int32 target, new_target; opus_int32 min_allowed; + int coded_bins; + int coded_bands; int lm_diff = st->mode->maxLM - LM; + coded_bands = st->lastCodedBands ? st->lastCodedBands : st->mode->nbEBands; + coded_bins = st->mode->eBands[coded_bands]<mode->eBands[IMIN(intensity, coded_bands)]<constrained_vbr) target += (st->vbr_offset>>lm_diff); +#ifndef FIXED_POINT + if (st->analysis.valid && st->analysis.activity<.4) + target -= (coded_bins<analysis.activity); +#endif + #ifdef FIXED_POINT new_target = SHL32(MULT16_32_Q15(target, SUB16(tf_estimate, QCONST16(0.05, 14))),1); #else new_target = target*(tf_estimate-.05); #endif - if (1) { + +#ifndef FIXED_POINT + if (st->analysis.valid) { int tonal_target; float tonal; - int coded_bins; - int coded_bands; - tonal = st->frame_tonality/32768.; + tonal = st->analysis.tonality; tonal -= .06; - coded_bands = st->lastCodedBands ? st->lastCodedBands : st->mode->nbEBands; - //coded_bands = IMIN(coded_bands, st->mode->nbEBands-1); - coded_bins = st->mode->eBands[coded_bands]<mode->eBands[IMIN(intensity, coded_bands)]<signalling = value; } break; - case CELT_SET_TONALITY_REQUEST: + case CELT_SET_ANALYSIS_REQUEST: { - opus_int32 value = va_arg(ap, opus_int32); - st->frame_tonality = value; - } - break; - case CELT_SET_TONALITY_SLOPE_REQUEST: - { - opus_int32 value = va_arg(ap, opus_int32); - st->tonality_slope = value; + AnalysisInfo *info = va_arg(ap, AnalysisInfo *); + if (info) + OPUS_COPY(&st->analysis, info, 1); } break; case CELT_GET_MODE_REQUEST: diff --git a/celt/celt.h b/celt/celt.h index 553670c9..54bca447 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -50,7 +50,18 @@ extern "C" { #define CELTDecoder OpusCustomDecoder #define CELTMode OpusCustomMode -#define _celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) +typedef struct { + int valid; + opus_val16 tonality; + opus_val16 tonality_slope; + opus_val16 activity; + int boost_band[2]; + opus_val16 boost_amount[2]; +}AnalysisInfo; + +#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) + +#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) /* Encoder/decoder Requests */ @@ -81,7 +92,7 @@ extern "C" { #define CELT_GET_MODE_REQUEST 10015 /** Get the CELTMode used by an encoder or decoder */ -#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, _celt_check_mode_ptr_ptr(x) +#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x) #define CELT_SET_SIGNALLING_REQUEST 10016 #define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x) @@ -91,6 +102,9 @@ extern "C" { #define CELT_SET_TONALITY_SLOPE_REQUEST 10020 #define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x) +#define CELT_SET_ANALYSIS_REQUEST 10022 +#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x) + /* Encoder stuff */ diff --git a/src/analysis.c b/src/analysis.c index 21a4a106..49517f61 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -50,35 +50,33 @@ typedef struct { float prev_band_tonality[NB_TBANDS]; float prev_tonality; float E[NB_FRAMES][NB_TBANDS]; + float lowE[NB_TBANDS], highE[NB_TBANDS]; int E_count; + int count; } TonalityAnalysisState; -int boost_band[2]; -float boost_amount[2]; - -float tonality_analysis(TonalityAnalysisState *tonal, CELTEncoder *celt_enc, const opus_val16 *x, int C, float *tslope) +void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C) { int i, b; const CELTMode *mode; const kiss_fft_state *kfft; kiss_fft_cpx in[480], out[480]; - const opus_val16 *window; - int overlap = 240; int N = 480, N2=240; float * restrict A = tonal->angle; float * restrict dA = tonal->d_angle; float * restrict d2A = tonal->d2_angle; float tonality[240]; + float noisiness[240]; float band_tonality[NB_TBANDS]; float frame_tonality; + float frame_noisiness; const float pi4 = M_PI*M_PI*M_PI*M_PI; float slope=0; - float max_tonality=-1; - int max_band=0; + float frame_stationarity; + float relativeE; celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode)); kfft = mode->mdct.kfft[0]; - window = mode->window; if (C==1) { for (i=0;iactivity = 0; + frame_noisiness = 0; + frame_stationarity = 0; + if (!tonal->count) + { + for (b=0;blowE[b] = 1e10; + tonal->highE[b] = -1e10; + } + } + relativeE = 0; + info->boost_amount[0]=info->boost_amount[1]=0; + info->boost_band[0]=info->boost_band[1]=0; for (b=0;bE[tonal->E_count][b] = E; + frame_noisiness += nE/(1e-15+E); + + logE = log(E+EPSILON); + tonal->lowE[b] = MIN32(logE, tonal->lowE[b]+.01); + tonal->highE[b] = MAX32(logE, tonal->highE[b]-.1); + if (tonal->highE[b] < tonal->lowE[b]+1) + { + tonal->highE[b]+=.5; + tonal->lowE[b]-=.5; + } + relativeE += (logE-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]); + L1=L2=0; for (i=0;iprev_band_tonality[b]); - //if (band_tonality[b]>1) - // printf("%f %f %f\n", L1, L2, stationarity); - //fprintf(stdout, "%f ", band_tonality[b]); + frame_stationarity += stationarity; + /*band_tonality[b] = tE/(1e-15+E)*/; + band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); if (b>=7) frame_tonality += band_tonality[b]; slope += band_tonality[b]*(b-8); - if (band_tonality[b] > boost_amount[1] && b>=7 && b < NB_TBANDS-1) + if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1) { - if (band_tonality[b] > boost_amount[0]) + if (band_tonality[b] > info->boost_amount[0]) { - boost_amount[1] = boost_amount[0]; - boost_band[1] = boost_band[0]; - boost_amount[0] = band_tonality[b]; - boost_band[0] = b; + info->boost_amount[1] = info->boost_amount[0]; + info->boost_band[1] = info->boost_band[0]; + info->boost_amount[0] = band_tonality[b]; + info->boost_band[0] = b; } else { - boost_amount[1] = band_tonality[b]; - boost_band[1] = b; + info->boost_amount[1] = band_tonality[b]; + info->boost_band[1] = b; } } tonal->prev_band_tonality[b] = band_tonality[b]; } + frame_stationarity /= NB_TBANDS; + relativeE /= NB_TBANDS; + if (tonal->count<10) + relativeE = .5; + frame_noisiness /= NB_TBANDS; +#if 1 + info->activity = frame_noisiness + (1-frame_noisiness)*relativeE; +#else + info->activity = .5*(1+frame_noisiness-frame_stationarity); +#endif frame_tonality /= NB_TBANDS-7; frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8); - //fprintf(stdout, "%f\n", frame_tonality); tonal->prev_tonality = frame_tonality; - boost_amount[0] -= frame_tonality+.2; - boost_amount[1] -= frame_tonality+.2; - if (band_tonality[boost_band[0]] < band_tonality[boost_band[0]+1]+.15 - || band_tonality[boost_band[0]] < band_tonality[boost_band[0]-1]+.15) - boost_amount[0]=0; - if (band_tonality[boost_band[1]] < band_tonality[boost_band[1]+1]+.15 - || band_tonality[boost_band[1]] < band_tonality[boost_band[1]-1]+.15) - boost_amount[1]=0; - - //boost_band = 16; - //boost_amount = .6; - //printf("%d %f %f\n", max_band, max_tonality, frame_tonality); + info->boost_amount[0] -= frame_tonality+.2; + info->boost_amount[1] -= frame_tonality+.2; + if (band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]+1]+.15 + || band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]-1]+.15) + info->boost_amount[0]=0; + if (band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]+1]+.15 + || band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]-1]+.15) + info->boost_amount[1]=0; + slope /= 8*8; - *tslope = slope; - //fprintf(stdout, "%f %f\n", frame_tonality, slope); + info->tonality_slope = slope; tonal->E_count = (tonal->E_count+1)%NB_FRAMES; - return frame_tonality; + tonal->count++; + info->tonality = frame_tonality; + info->valid = 1; } diff --git a/src/opus_encoder.c b/src/opus_encoder.c index a1619483..e08bcdb2 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -85,7 +85,9 @@ struct OpusEncoder { /* Sampling rate (at the API level) */ int first; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; - +#ifndef FIXED_POINT + TonalityAnalysisState analysis; +#endif opus_uint32 rangeFinal; }; @@ -102,7 +104,7 @@ static const opus_int32 mono_music_bandwidth_thresholds[8] = { 14000, 1000, /* MB not allowed */ 18000, 2000, /* MB<->WB */ 24000, 2000, /* WB<->SWB */ - 31000, 2000, /* SWB<->FB */ + 33000, 2000, /* SWB<->FB */ }; static const opus_int32 stereo_voice_bandwidth_thresholds[8] = { 11000, 1000, /* NB<->MB */ @@ -471,6 +473,10 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s int curr_bandwidth; opus_int32 max_data_bytes; int extra_buffer, total_buffer; + int perform_analysis=0; +#ifndef FIXED_POINT + AnalysisInfo analysis_info; +#endif VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -492,13 +498,18 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s silk_enc = (char*)st+st->silk_enc_offset; celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); +#ifndef FIXED_POINT + perform_analysis = st->silk_mode.complexity >= 7 && frame_size >= st->Fs/100 && st->Fs==48000; +#endif if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) delay_compensation = 0; else delay_compensation = st->delay_compensation; - if (1) + if (perform_analysis) { - total_buffer = IMAX(240, delay_compensation); + total_buffer = IMAX(st->Fs/200, delay_compensation); + } else { + total_buffer = delay_compensation; } extra_buffer = total_buffer-delay_compensation; st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); @@ -849,13 +860,17 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s pcm_buf[total_buffer*st->channels + i] = pcm[i]; } - static TonalityAnalysisState tonal; - float tonality; - float tonality_slope; - tonality_analysis(&tonal, celt_enc, pcm_buf, st->channels, &tonality_slope); - tonality = tonality_analysis(&tonal, celt_enc, pcm_buf+(st->Fs/100)*st->channels, st->channels, &tonality_slope); - celt_encoder_ctl(celt_enc, CELT_SET_TONALITY(32768*tonality)); - celt_encoder_ctl(celt_enc, CELT_SET_TONALITY_SLOPE(16384*tonality_slope)); +#ifndef FIXED_POINT + if (perform_analysis) + { + int nb_analysis_frames; + nb_analysis_frames = frame_size/(st->Fs/100); + for (i=0;ianalysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels); + } else { + analysis_info.valid = 0; + } +#endif /* SILK processing */ if (st->mode != MODE_CELT_ONLY) @@ -1171,6 +1186,10 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s /* If false, we already busted the budget and we'll end up with a "PLC packet" */ if (ec_tell(&enc) <= 8*nb_compr_bytes) { +#ifndef FIXED_POINT + if (perform_analysis) + celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); +#endif ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc); if (ret < 0) { -- cgit v1.2.1