diff options
author | Felicia Lim <flim@google.com> | 2016-05-16 15:52:44 +0200 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2016-07-17 15:05:54 -0400 |
commit | 33f18a20dc1eb401e51369e97674f09f79943631 (patch) | |
tree | 567a641d65b6b6a53e5f976d5182512856fac095 /src/opus_encoder.c | |
parent | 36481346729962928b58672a86bef140b703787a (diff) | |
download | opus-33f18a20dc1eb401e51369e97674f09f79943631.tar.gz |
Turn on DTX only when the noise is sufficiently quiet
Computes a pseudo SNR as the ratio between the running peak signal
energy to instantaneous noise energy. DTX is turned on only if this is
higher than a pre-determined threshold.
Signed-off-by: Jean-Marc Valin <jmvalin@jmvalin.ca>
Diffstat (limited to 'src/opus_encoder.c')
-rw-r--r-- | src/opus_encoder.c | 55 |
1 files changed, 52 insertions, 3 deletions
diff --git a/src/opus_encoder.c b/src/opus_encoder.c index d62ea3f7..671e8d21 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -53,6 +53,10 @@ #define MAX_ENCODER_BUFFER 480 +#ifndef DISABLE_FLOAT_API +#define PSEUDO_SNR_THRESHOLD 316.23f /* 10^(25/10) */ +#endif + typedef struct { opus_val32 XX, XY, YY; opus_val16 smoothed_width; @@ -107,6 +111,7 @@ struct OpusEncoder { #ifndef DISABLE_FLOAT_API int detected_bandwidth; int nb_no_activity_frames; + opus_val32 peak_signal_energy; #endif opus_uint32 rangeFinal; }; @@ -1028,20 +1033,60 @@ static int is_digital_silence(const opus_val16* pcm, int frame_size, int lsb_dep return silence; } +static opus_val32 compute_frame_energy(const opus_val16 *pcm, int frame_size) +{ + int i; +#ifdef FIXED_POINT + opus_val32 sample_max; + int max_shift; + int shift; +#endif + opus_val32 energy = 0; + +#ifdef FIXED_POINT + /* Max amplitude in the signal */ + sample_max = celt_maxabs16(pcm, frame_size); + + /* Compute the right shift required in the MAC to avoid an overflow */ + max_shift = celt_ilog2(frame_size); + shift = IMAX(0, (celt_ilog2(sample_max) << 1) + max_shift - 28); +#endif + + /* Compute the energy */ + for (i=0; i<frame_size; i++) + energy += SHR32(MULT16_16(pcm[i], pcm[i]), shift); + + /* Normalize energy by the frame size and left-shift back to the original position */ + energy /= frame_size; + energy = SHL32(energy, shift); + + return energy; +} + /* Decides if DTX should be turned on (=1) or off (=0) */ static int decide_dtx_mode(float activity_probability, /* probability that current frame contains speech/music */ int *nb_no_activity_frames, /* number of consecutive frames with no activity */ + opus_val32 peak_signal_energy, /* peak energy of desired signal detected so far */ + const opus_val16 *pcm, /* input pcm signal */ + int frame_size, /* frame size */ int is_silence /* only digital silence detected in this frame */ ) { - int is_noise = 0; + int is_noise; + opus_val32 noise_energy; + int is_sufficiently_quiet; if (!is_silence) { is_noise = activity_probability < DTX_ACTIVITY_THRESHOLD; + if (is_noise) + { + noise_energy = compute_frame_energy(pcm, frame_size); + is_sufficiently_quiet = peak_signal_energy >= (PSEUDO_SNR_THRESHOLD * noise_energy); + } } - if (is_silence || is_noise) + if (is_silence || (is_noise && is_sufficiently_quiet)) { /* The number of consecutive DTX frames should be within the allowed bounds */ (*nb_no_activity_frames)++; @@ -1145,6 +1190,10 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ c1, c2, analysis_channels, st->Fs, lsb_depth, downmix, &analysis_info); } + + /* Track the peak signal energy */ + if (!is_silence && analysis_info.activity_probability > DTX_ACTIVITY_THRESHOLD) + st->peak_signal_energy = MAX32(MULT16_32_Q15(QCONST16(0.999, 15), st->peak_signal_energy), compute_frame_energy(pcm, frame_size)); } #else (void)analysis_pcm; @@ -2095,7 +2144,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifndef DISABLE_FLOAT_API if (st->use_dtx && (analysis_info.valid || is_silence)) { - if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames, is_silence)) + if (decide_dtx_mode(analysis_info.activity_probability, &st->nb_no_activity_frames, st->peak_signal_energy, pcm, frame_size, is_silence)) { st->rangeFinal = 0; data[0] = gen_toc(st->mode, st->Fs/frame_size, curr_bandwidth, st->stream_channels); |