summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2012-01-12 03:44:49 -0500
committerJean-Marc Valin <jmvalin@jmvalin.ca>2012-03-05 20:50:10 -0500
commit39c4cc688d0ab3c4074dbd986a46e0393dab6dff (patch)
tree093aa350476491bfeb182319456e9ec765529b8a
parente59889e48b947f45cebf95f62ae6e53e6a00362d (diff)
downloadopus-39c4cc688d0ab3c4074dbd986a46e0393dab6dff.tar.gz
Tonality and pitch tuning
Tuned the tonality estimator to trigger on signals where only part of the spectrum is tonal. Also tuned the pitch detector not to be confused by short-term correlation.
-rw-r--r--celt/celt.c12
-rw-r--r--celt/pitch.c10
-rw-r--r--src/analysis.c11
3 files changed, 21 insertions, 12 deletions
diff --git a/celt/celt.c b/celt/celt.c
index 03256af5..af458738 100644
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -442,7 +442,7 @@ static int transient_analysis(const opus_val32 * restrict in, int len, int C,
#ifdef FUZZING
is_transient = rand()&0x1;
#endif
- /*printf("%d %d %d %f %f\n", is_transient, *tf_estimate, tf_max, 0., 1.);*/
+ /*printf("%d %f %d %f %f ", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/
return is_transient;
}
@@ -1205,8 +1205,10 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+ /* Don't search for the fir last 1.5 octave of the range because
+ there's too many false-positives due to short-term correlation */
pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
- COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
+ COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
@@ -1615,11 +1617,11 @@ int celt_encode_with_ec(CELTEncoder * restrict st, const opus_val16 * pcm, int f
if (st->analysis.valid) {
int tonal_target;
float tonal;
- tonal = MAX16(0,st->analysis.tonality-.2)*(.5+st->analysis.tonality);
- tonal_target = target + (coded_bins<<BITRES)*1.6f*tonal;
+ tonal = MAX16(0,st->analysis.tonality-.2);
+ tonal_target = new_target + (coded_bins<<BITRES)*2.0f*tonal;
if (pitch_change)
tonal_target += (coded_bins<<BITRES)*.8;
- /*printf("%f %d\n", tonal, tonal_target);*/
+ /*printf("%f %f ", st->analysis.tonality, tonal);*/
new_target = IMAX(tonal_target,new_target);
}
#endif
diff --git a/celt/pitch.c b/celt/pitch.c
index b4079026..743528d0 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -307,6 +307,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int T1, T1b;
opus_val16 g1;
opus_val16 cont=0;
+ opus_val16 thresh;
T1 = (2*T0+k)/(2*k);
if (T1 < minperiod)
break;
@@ -348,7 +349,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
cont = HALF32(prev_gain);
else
cont = 0;
- if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
+ thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont);
+ /* Bias against very high pitch (very short period) to avoid false-positives
+ due to short-term correlation */
+ if (T1<3*minperiod)
+ thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont);
+ else if (T1<2*minperiod)
+ thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont);
+ if (g1 > thresh)
{
best_xy = xy;
best_yy = yy;
diff --git a/src/analysis.c b/src/analysis.c
index 764b3703..1336628c 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -74,7 +74,7 @@ static const float tweight[NB_TBANDS+1] = {
.3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
};
-#define NB_TONAL_SKIP_BANDS 0
+#define NB_TONAL_SKIP_BANDS 9
typedef struct {
float angle[240];
@@ -265,8 +265,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
frame_stationarity += stationarity;
/*band_tonality[b] = tE/(1e-15+E)*/;
band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
- //printf("%f ", band_tonality[b]);
-#if 1
+#if 0
if (b>=NB_TONAL_SKIP_BANDS)
{
frame_tonality += tweight[b]*band_tonality[b];
@@ -277,7 +276,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
#endif
- max_frame_tonality = MAX16(max_frame_tonality, frame_tonality);
+ max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality);
slope += band_tonality[b]*(b-8);
/*printf("%f %f ", band_tonality[b], stationarity);*/
if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
@@ -295,7 +294,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
}
tonal->prev_band_tonality[b] = band_tonality[b];
}
- //printf("\n");
+
frame_loudness = 20*log10(frame_loudness);
tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
tonal->lowECount *= (1-alphaE);
@@ -320,7 +319,7 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc
#else
info->activity = .5*(1+frame_noisiness-frame_stationarity);
#endif
- frame_tonality = (max_frame_tonality/(tw_sum));
+ frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
tonal->prev_tonality = frame_tonality;
info->boost_amount[0] -= frame_tonality+.2;