summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2017-07-15 16:29:29 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2017-07-15 16:29:29 -0400
commitbec02d3d7ee51b394e9cd3c841fbb67d6ba2bba1 (patch)
treed3b161574fa5489f81fd348e878d84c4402e4426
parente6704abcad486c9966e79fda3663074a36870c0f (diff)
downloadopus-exp_rnn1.tar.gz
Using lookahead (when possible) to make the mode decision on the first frameexp_rnn1
-rw-r--r--src/analysis.c4
1 files changed, 2 insertions, 2 deletions
diff --git a/src/analysis.c b/src/analysis.c
index a32f63b6..1d6dd829 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -313,7 +313,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
The last step is that we need to consider whether we want to switch at all.
For that we use the average of the music probability over the entire window.
- If the threshold is higher than higher than that average we're not going to
+ If the threshold is higher than that average we're not going to
switch, so we compute a min with the average as well. The result of all these
min operations is music_prob_min, which gives the threshold for switching to music
if we're currently encoding for speech.
@@ -321,7 +321,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
We do the exact opposite to compute music_prob_max which is used for switching
from music to speech.
*/
- info_out->music_prob = tonal->info[mpos].music_prob;
prob_min = 1.f;
prob_max = 0.f;
vad_prob = tonal->info[vpos].activity_probability;
@@ -346,6 +345,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
prob_count += MAX16(.1f, pos_vad);
prob_avg += MAX16(.1f, pos_vad)*tonal->info[mpos].music_prob;
}
+ info_out->music_prob = prob_avg/prob_count;
prob_min = MIN16(prob_avg/prob_count, prob_min);
prob_max = MAX16(prob_avg/prob_count, prob_max);
prob_min = MAX16(prob_min, 0.f);