summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2017-07-13 16:46:53 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2017-07-13 16:46:53 -0400
commit85d21e317322988ac4a0f100f6ae5514c5a1675a (patch)
treead79ee54fc32eec4519dcb93e90316702ee82927
parente271268a4c484e04cdc3615b53cb5794c9977d2b (diff)
downloadopus-85d21e317322988ac4a0f100f6ae5514c5a1675a.tar.gz
This seems to fix the decision
-rw-r--r--src/analysis.c51
1 files changed, 30 insertions, 21 deletions
diff --git a/src/analysis.c b/src/analysis.c
index 9077317e..d4ce483a 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -50,7 +50,7 @@
#ifndef DISABLE_FLOAT_API
-#define TRANSITION_PENALTY 5
+#define TRANSITION_PENALTY 10
static const float dct_table[128] = {
0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
@@ -241,6 +241,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
float prob_count;
float prob_min, prob_max;
float vad_prob;
+ int mpos, vpos;
pos = tonal->read_pos;
curr_lookahead = tonal->write_pos-tonal->read_pos;
@@ -276,44 +277,52 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
}
info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f);
- pos = pos0;
- /* If we have enough look-ahead, discard the first 2 frames to compensate for the
- delay in the features. */
+ mpos = vpos = pos0;
+ /* If we have enough look-ahead, compensate for the ~5-frame delay in the music prob and
+ ~1 frame delay in the VAD prob. */
if (curr_lookahead > 10)
{
- pos += 2;
- if (pos>=DETECT_SIZE)
- pos -= DETECT_SIZE;
+ mpos += 5;
+ if (mpos>=DETECT_SIZE)
+ mpos -= DETECT_SIZE;
+ vpos += 1;
+ if (vpos>=DETECT_SIZE)
+ vpos -= DETECT_SIZE;
}
- info_out->music_prob = tonal->info[pos].music_prob;
- prob_avg = tonal->info[pos].music_prob;
+ info_out->music_prob = tonal->info[mpos].music_prob;
prob_min = 1;
prob_max = 0;
- vad_prob = tonal->info[pos].activity_probability;
+ vad_prob = tonal->info[vpos].activity_probability;
prob_count = MAX16(.1, vad_prob);
+ prob_avg = MAX16(.1, vad_prob)*tonal->info[mpos].music_prob;
while (1)
{
float pos_vad;
- pos++;
- if (pos==DETECT_SIZE)
- pos = 0;
- if (pos == tonal->write_pos)
+ mpos++;
+ if (mpos==DETECT_SIZE)
+ mpos = 0;
+ if (mpos == tonal->write_pos)
+ break;
+ vpos++;
+ if (vpos==DETECT_SIZE)
+ vpos = 0;
+ if (vpos == tonal->write_pos)
break;
- pos_vad = tonal->info[pos].activity_probability;
+ pos_vad = tonal->info[vpos].activity_probability;
+ prob_min = MIN16((prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_min);
+ prob_max = MAX16((prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_max);
prob_count += MAX16(.1, pos_vad);
- prob_avg += (tonal->info[pos].music_prob-prob_avg)/prob_count;
- prob_min = MIN16(prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad)/prob_count, prob_min);
- prob_max = MAX16(prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad)/prob_count, prob_max);
+ prob_avg += MAX16(.1, pos_vad)*tonal->info[mpos].music_prob;
}
- prob_min = MIN16(prob_avg, prob_min);
- prob_max = MAX16(prob_avg, prob_max);
+ prob_min = MIN16(prob_avg/prob_count, prob_min);
+ prob_max = MAX16(prob_avg/prob_count, prob_max);
prob_min = MAX16(prob_min, 0);
prob_max = MIN16(prob_max, 1);
info_out->music_prob_min = prob_min;
info_out->music_prob_max = prob_max;
- /*printf("%f %f %f\n", prob_min, prob_max, prob_count);*/
+ /*printf("%f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob);*/
tonal->read_subframe += len/(tonal->Fs/400);
while (tonal->read_subframe>=8)
{