diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2012-01-12 03:44:49 -0500 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2012-03-05 20:50:10 -0500 |
commit | 39c4cc688d0ab3c4074dbd986a46e0393dab6dff (patch) | |
tree | 093aa350476491bfeb182319456e9ec765529b8a | |
parent | e59889e48b947f45cebf95f62ae6e53e6a00362d (diff) | |
download | opus-39c4cc688d0ab3c4074dbd986a46e0393dab6dff.tar.gz |
Tonality and pitch tuning
Tuned the tonality estimator to trigger on signals where only part of the
spectrum is tonal. Also tuned the pitch detector not to be confused
by short-term correlation.
-rw-r--r-- | celt/celt.c | 12 | ||||
-rw-r--r-- | celt/pitch.c | 10 | ||||
-rw-r--r-- | src/analysis.c | 11 |
3 files changed, 21 insertions, 12 deletions
diff --git a/celt/celt.c b/celt/celt.c index 03256af5..af458738 100644 --- a/celt/celt.c +++ b/celt/celt.c @@ -442,7 +442,7 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C, #ifdef FUZZING is_transient = rand()&0x1; #endif - /*printf("%d %d %d %f %f\n", is_transient, *tf_estimate, tf_max, 0., 1.);*/ + /*printf("%d %f %d %f %f ", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/ return is_transient; } @@ -1205,8 +1205,10 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16); pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC); + /* Don't search for the fir last 1.5 octave of the range because + there's too many false-positives due to short-term correlation */ pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, - COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index); + COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index); pitch_index = COMBFILTER_MAXPERIOD-pitch_index; gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, @@ -1615,11 +1617,11 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f if (st->analysis.valid) { int tonal_target; float tonal; - tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality); - tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal; + tonal = MAX16(0,st->analysis.tonality-.2); + tonal_target = new_target + (coded_bins<<BITRES)*2.0f*tonal; if (pitch_change) tonal_target += (coded_bins<<BITRES)*.8; - /*printf("%f %d\n", tonal, tonal_target);*/ + /*printf("%f %f ", st->analysis.tonality, tonal);*/ new_target = IMAX(tonal_target,new_target); } #endif diff --git a/celt/pitch.c b/celt/pitch.c index b4079026..743528d0 100644 --- a/celt/pitch.c +++ b/celt/pitch.c @@ -307,6 +307,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int T1, T1b; opus_val16 g1; opus_val16 cont=0; + opus_val16 thresh; T1 = (2*T0+k)/(2*k); if (T1 < minperiod) break; @@ -348,7 +349,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, cont = HALF32(prev_gain); else cont = 0; - if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont) + thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont); + /* Bias against very high pitch (very short period) to avoid false-positives + due to short-term correlation */ + if (T1<3*minperiod) + thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont); + else if (T1<2*minperiod) + thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont); + if (g1 > thresh) { best_xy = xy; best_yy = yy; diff --git a/src/analysis.c b/src/analysis.c index 764b3703..1336628c 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -74,7 +74,7 @@ static const float tweight[NB_TBANDS+1] = { .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5 }; -#define NB_TONAL_SKIP_BANDS 0 +#define NB_TONAL_SKIP_BANDS 9 typedef struct { float angle[240]; @@ -265,8 +265,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc frame_stationarity += stationarity; /*band_tonality[b] = tE/(1e-15+E)*/; band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); - //printf("%f ", band_tonality[b]); -#if 1 +#if 0 if (b>=NB_TONAL_SKIP_BANDS) { frame_tonality += tweight[b]*band_tonality[b]; @@ -277,7 +276,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS) frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS]; #endif - max_frame_tonality = MAX16(max_frame_tonality, frame_tonality); + max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality); slope += band_tonality[b]*(b-8); /*printf("%f %f ", band_tonality[b], stationarity);*/ if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1) @@ -295,7 +294,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc } tonal->prev_band_tonality[b] = band_tonality[b]; } - //printf("\n"); + frame_loudness = 20*log10(frame_loudness); tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness); tonal->lowECount *= (1-alphaE); @@ -320,7 +319,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc #else info->activity = .5*(1+frame_noisiness-frame_stationarity); #endif - frame_tonality = (max_frame_tonality/(tw_sum)); + frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS)); frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8); tonal->prev_tonality = frame_tonality; info->boost_amount[0] -= frame_tonality+.2; |