diff options
Diffstat (limited to 'chromium/third_party/webrtc/modules/audio_processing/aec3')
14 files changed, 203 insertions, 173 deletions
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc index 9ff3c09b333..40f64fd1047 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc @@ -365,8 +365,9 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) { filter.Adapt(render_buffer, G); aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(filter.FilterFrequencyResponse(), - filter.FilterImpulseResponse(), rtc::Optional<size_t>(), - render_buffer, E2_main, Y2, x[0], s, false); + filter.FilterImpulseResponse(), true, + rtc::Optional<size_t>(), render_buffer, E2_main, Y2, + x[0], s, false); } // Verify that the filter is able to perform well. EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f), diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h index 2442f909e57..031e9b14769 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h @@ -39,7 +39,7 @@ constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1; constexpr size_t kFftLength = 2 * kFftLengthBy2; constexpr int kAdaptiveFilterLength = 12; -constexpr int kResidualEchoPowerRenderWindowSize = 30; +constexpr int kUnknownDelayRenderWindowSize = 30; constexpr int kAdaptiveFilterTimeDomainLength = kAdaptiveFilterLength * kFftLengthBy2; @@ -69,6 +69,8 @@ constexpr size_t kRenderTransferQueueSize = kMaxApiCallsJitterBlocks / 2; static_assert(2 * kRenderTransferQueueSize >= kMaxApiCallsJitterBlocks, "Requirement to ensure buffer overflow detection"); +constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond; + // TODO(peah): Integrate this with how it is done inside audio_processing_impl. constexpr size_t NumBandsForRate(int sample_rate_hz) { return static_cast<size_t>(sample_rate_hz == 8000 ? 1 diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc index 6ea54fcf3d2..14b83e10be5 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc @@ -23,54 +23,29 @@ namespace webrtc { namespace { // Computes delay of the adaptive filter. -rtc::Optional<size_t> EstimateFilterDelay( +int EstimateFilterDelay( const std::vector<std::array<float, kFftLengthBy2Plus1>>& adaptive_filter_frequency_response) { const auto& H2 = adaptive_filter_frequency_response; - - size_t reliable_delays_sum = 0; - size_t num_reliable_delays = 0; - constexpr size_t kUpperBin = kFftLengthBy2 - 5; - constexpr float kMinPeakMargin = 10.f; - const size_t kTailPartition = H2.size() - 1; + RTC_DCHECK_GE(kAdaptiveFilterLength, H2.size()); + std::array<int, kAdaptiveFilterLength> delays; + delays.fill(0); for (size_t k = 1; k < kUpperBin; ++k) { // Find the maximum of H2[j]. - int peak = 0; + size_t peak = 0; for (size_t j = 0; j < H2.size(); ++j) { if (H2[j][k] > H2[peak][k]) { peak = j; } } - - // Count the peak as a delay only if the peak is sufficiently larger than - // the tail. - if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) { - reliable_delays_sum += peak; - ++num_reliable_delays; - } - } - - // Return no delay if not sufficient delays have been found. - if (num_reliable_delays < 21) { - return rtc::Optional<size_t>(); + ++delays[peak]; } - const size_t delay = reliable_delays_sum / num_reliable_delays; - // Sanity check that the peak is not caused by a false strong DC-component in - // the filter. - for (size_t k = 1; k < kUpperBin; ++k) { - if (H2[delay][k] > H2[delay][0]) { - RTC_DCHECK_GT(H2.size(), delay); - return rtc::Optional<size_t>(delay); - } - } - return rtc::Optional<size_t>(); + return std::distance(delays.begin(), + std::max_element(delays.begin(), delays.end())); } -constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5; -constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond; - } // namespace int AecState::instance_count_ = 0; @@ -81,7 +56,6 @@ AecState::AecState(const AudioProcessing::Config::EchoCanceller3& config) erle_estimator_(config.param.erle.min, config.param.erle.max_l, config.param.erle.max_h), - echo_path_change_counter_(kEchoPathChangeCounterInitial), config_(config), reverb_decay_(config_.param.ep_strength.default_len) {} @@ -102,10 +76,10 @@ void AecState::HandleEchoPathChange( blocks_with_filter_adaptation_ = 0; render_received_ = false; force_zero_gain_ = true; - echo_path_change_counter_ = kEchoPathChangeCounterMax; + capture_block_counter_ = 0; } if (echo_path_variability.gain_change) { - echo_path_change_counter_ = kEchoPathChangeCounterInitial; + capture_block_counter_ = kNumBlocksPerSecond; } } } @@ -114,6 +88,7 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& adaptive_filter_frequency_response, const std::array<float, kAdaptiveFilterTimeDomainLength>& adaptive_filter_impulse_response, + bool converged_filter, const rtc::Optional<size_t>& external_delay_samples, const RenderBuffer& render_buffer, const std::array<float, kFftLengthBy2Plus1>& E2_main, @@ -121,77 +96,79 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>& rtc::ArrayView<const float> x, const std::array<float, kBlockSize>& s, bool echo_leakage_detected) { - // Update the echo audibility evaluator. - echo_audibility_.Update(x, s); - // Store input parameters. echo_leakage_detected_ = echo_leakage_detected; // Update counters. - const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); - - const bool active_render_block = - x_energy > (config_.param.render_levels.active_render_limit * - config_.param.render_levels.active_render_limit) * - kFftLengthBy2; - if (active_render_block) { - render_received_ = true; - } - blocks_with_filter_adaptation_ += - (active_render_block && (!SaturatedCapture()) ? 1 : 0); - --echo_path_change_counter_; + ++capture_block_counter_; // Force zero echo suppression gain after an echo path change to allow at // least some render data to be collected in order to avoid an initial echo // burst. - constexpr size_t kZeroGainBlocksAfterChange = kNumBlocksPerSecond / 5; - force_zero_gain_ = (++force_zero_gain_counter_) < kZeroGainBlocksAfterChange; + force_zero_gain_ = (++force_zero_gain_counter_) < kNumBlocksPerSecond / 5; // Estimate delays. - filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response); + filter_delay_ = rtc::Optional<size_t>( + EstimateFilterDelay(adaptive_filter_frequency_response)); external_delay_ = external_delay_samples ? rtc::Optional<size_t>(*external_delay_samples / kBlockSize) : rtc::Optional<size_t>(); // Update the ERL and ERLE measures. - if (filter_delay_ && echo_path_change_counter_ <= 0) { + if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) { const auto& X2 = render_buffer.Spectrum(*filter_delay_); erle_estimator_.Update(X2, Y2, E2_main); erl_estimator_.Update(X2, Y2); } + // Update the echo audibility evaluator. + echo_audibility_.Update(x, s, converged_filter); + // Detect and flag echo saturation. // TODO(peah): Add the delay in this computation to ensure that the render and // capture signals are properly aligned. RTC_DCHECK_LT(0, x.size()); const float max_sample = fabs(*std::max_element( x.begin(), x.end(), [](float a, float b) { return a * a < b * b; })); - const bool saturated_echo = - previous_max_sample_ * 100 > 1600 && SaturatedCapture(); - previous_max_sample_ = max_sample; - // Counts the blocks since saturation. - constexpr size_t kSaturationLeakageBlocks = 20; - blocks_since_last_saturation_ = - saturated_echo ? 0 : blocks_since_last_saturation_ + 1; - echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; + if (config_.param.ep_strength.echo_can_saturate) { + const bool saturated_echo = + (previous_max_sample_ > 200.f) && SaturatedCapture(); + + // Counts the blocks since saturation. + constexpr size_t kSaturationLeakageBlocks = 20; + blocks_since_last_saturation_ = + saturated_echo ? 0 : blocks_since_last_saturation_ + 1; + + echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks; + } else { + echo_saturation_ = false; + } + previous_max_sample_ = max_sample; // Flag whether the linear filter estimate is usable. - constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond; usable_linear_estimate_ = - (!echo_saturation_) && - (!render_received_ || - blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) && - filter_delay_ && echo_path_change_counter_ <= 0 && external_delay_; + (!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) && + capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_; // After an amount of active render samples for which an echo should have been // detected in the capture signal if the ERL was not infinite, flag that a - // headset is used. - constexpr size_t kHeadSetDetectionBlocks = 5 * kNumBlocksPerSecond; - headset_detected_ = !external_delay_ && !filter_delay_ && + // transparent mode should be entered. + const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f); + const bool active_render_block = + x_energy > (config_.param.render_levels.active_render_limit * + config_.param.render_levels.active_render_limit) * + kFftLengthBy2; + if (active_render_block) { + render_received_ = true; + } + blocks_with_filter_adaptation_ += + (active_render_block && (!SaturatedCapture()) ? 1 : 0); + + transparent_mode_ = !converged_filter && (!render_received_ || blocks_with_filter_adaptation_ >= - kHeadSetDetectionBlocks); + 5 * kNumBlocksPerSecond); // Update the room reverb estimate. UpdateReverb(adaptive_filter_impulse_response); @@ -276,7 +253,8 @@ void AecState::UpdateReverb( } void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x, - const std::array<float, kBlockSize>& s) { + const std::array<float, kBlockSize>& s, + bool converged_filter) { auto result_x = std::minmax_element(x.begin(), x.end()); auto result_s = std::minmax_element(s.begin(), s.end()); const float x_abs = @@ -284,10 +262,18 @@ void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x, const float s_abs = std::max(std::abs(*result_s.first), std::abs(*result_s.second)); - if (x_abs < 5.f) { - ++low_farend_counter_; + if (converged_filter) { + if (x_abs < 20.f) { + ++low_farend_counter_; + } else { + low_farend_counter_ = 0; + } } else { - low_farend_counter_ = 0; + if (x_abs < 100.f) { + ++low_farend_counter_; + } else { + low_farend_counter_ = 0; + } } // The echo is deemed as not audible if the echo estimate is on the level of @@ -296,7 +282,8 @@ void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x, // any residual echo that is below the quantization noise level. Furthermore, // cases where the render signal is very close to zero are also identified as // not producing audible echo. - inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f; + inaudible_echo_ = (max_nearend_ > 500 && s_abs < 30.f) || + (!converged_filter && x_abs < 500); inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20; } diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h index 53899e55d14..358c74d8e0b 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h @@ -72,8 +72,8 @@ class AecState { capture_signal_saturation_ = capture_signal_saturation; } - // Returns whether a probable headset setup has been detected. - bool HeadsetDetected() const { return headset_detected_; } + // Returns whether the transparent mode is active + bool TransparentMode() const { return transparent_mode_; } // Takes appropriate action at an echo path change. void HandleEchoPathChange(const EchoPathVariability& echo_path_variability); @@ -92,10 +92,20 @@ class AecState { echo_audibility_.UpdateWithOutput(e); } + // Returns whether the linear filter should have been able to adapt properly. + bool SufficientFilterUpdates() const { + return blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks; + } + // Returns whether the echo subtractor can be used to determine the residual // echo. bool LinearEchoEstimate() const { - return UsableLinearEstimate() && !HeadsetDetected(); + return UsableLinearEstimate() && !TransparentMode(); + } + + // Returns whether the AEC is in an initial state. + bool InitialState() const { + return capture_block_counter_ < 3 * kNumBlocksPerSecond; } // Updates the aec state. @@ -103,6 +113,7 @@ class AecState { adaptive_filter_frequency_response, const std::array<float, kAdaptiveFilterTimeDomainLength>& adaptive_filter_impulse_response, + bool converged_filter, const rtc::Optional<size_t>& external_delay_samples, const RenderBuffer& render_buffer, const std::array<float, kFftLengthBy2Plus1>& E2_main, @@ -115,7 +126,8 @@ class AecState { class EchoAudibility { public: void Update(rtc::ArrayView<const float> x, - const std::array<float, kBlockSize>& s); + const std::array<float, kBlockSize>& s, + bool converged_filter); void UpdateWithOutput(rtc::ArrayView<const float> e); bool InaudibleEcho() const { return inaudible_echo_; } @@ -133,13 +145,13 @@ class AecState { std::unique_ptr<ApmDataDumper> data_dumper_; ErlEstimator erl_estimator_; ErleEstimator erle_estimator_; - int echo_path_change_counter_; + size_t capture_block_counter_ = 0; size_t blocks_with_filter_adaptation_ = 0; bool usable_linear_estimate_ = false; bool echo_leakage_detected_ = false; bool capture_signal_saturation_ = false; bool echo_saturation_ = false; - bool headset_detected_ = false; + bool transparent_mode_ = false; float previous_max_sample_ = 0.f; bool force_zero_gain_ = false; bool render_received_ = false; diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc index 8413413ce2f..4956456a8c0 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc @@ -43,7 +43,7 @@ TEST(AecState, NormalUsage) { // Verify that linear AEC usability is false when the filter is diverged and // there is no external delay reported. - state.Update(diverged_filter_frequency_response, impulse_response, + state.Update(diverged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0], s, false); EXPECT_FALSE(state.UsableLinearEstimate()); @@ -51,7 +51,7 @@ TEST(AecState, NormalUsage) { // Verify that linear AEC usability is true when the filter is converged std::fill(x[0].begin(), x[0].end(), 101.f); for (int k = 0; k < 3000; ++k) { - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, false); } @@ -60,7 +60,7 @@ TEST(AecState, NormalUsage) { // Verify that linear AEC usability becomes false after an echo path change is // reported state.HandleEchoPathChange(EchoPathVariability(true, false)); - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, false); EXPECT_FALSE(state.UsableLinearEstimate()); @@ -68,25 +68,25 @@ TEST(AecState, NormalUsage) { // Verify that the active render detection works as intended. std::fill(x[0].begin(), x[0].end(), 101.f); state.HandleEchoPathChange(EchoPathVariability(true, true)); - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, false); EXPECT_FALSE(state.ActiveRender()); for (int k = 0; k < 1000; ++k) { - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, false); } EXPECT_TRUE(state.ActiveRender()); // Verify that echo leakage is properly reported. - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, false); EXPECT_FALSE(state.EchoLeakageDetected()); - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, true); EXPECT_TRUE(state.EchoLeakageDetected()); @@ -103,7 +103,7 @@ TEST(AecState, NormalUsage) { Y2.fill(10.f * 10000.f * 10000.f); for (size_t k = 0; k < 1000; ++k) { - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, false); } @@ -120,7 +120,7 @@ TEST(AecState, NormalUsage) { E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(10.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, false); } @@ -141,7 +141,7 @@ TEST(AecState, NormalUsage) { E2_main.fill(1.f * 10000.f * 10000.f); Y2.fill(5.f * E2_main[0]); for (size_t k = 0; k < 1000; ++k) { - state.Update(converged_filter_frequency_response, impulse_response, + state.Update(converged_filter_frequency_response, impulse_response, true, rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s, false); } @@ -161,34 +161,6 @@ TEST(AecState, NormalUsage) { } } -// Verifies the a non-significant delay is correctly identified. -TEST(AecState, NonSignificantDelay) { - AecState state(AudioProcessing::Config::EchoCanceller3{}); - RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, - std::vector<size_t>(1, 30)); - std::array<float, kFftLengthBy2Plus1> E2_main; - std::array<float, kFftLengthBy2Plus1> Y2; - std::array<float, kBlockSize> x; - EchoPathVariability echo_path_variability(false, false); - std::array<float, kBlockSize> s; - s.fill(100.f); - x.fill(0.f); - - std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30); - for (auto& v : frequency_response) { - v.fill(0.01f); - } - - std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response; - impulse_response.fill(0.f); - - // Verify that a non-significant filter delay is identified correctly. - state.HandleEchoPathChange(echo_path_variability); - state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(), - render_buffer, E2_main, Y2, x, s, false); - EXPECT_FALSE(state.FilterDelay()); -} - // Verifies the delay for a converged filter is correctly identified. TEST(AecState, ConvergedFilterDelay) { constexpr int kFilterLength = 10; @@ -217,8 +189,9 @@ TEST(AecState, ConvergedFilterDelay) { frequency_response[k].fill(100.f); frequency_response[k][0] = 0.f; state.HandleEchoPathChange(echo_path_variability); - state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(), - render_buffer, E2_main, Y2, x, s, false); + state.Update(frequency_response, impulse_response, true, + rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s, + false); EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay()); if (k != (kFilterLength - 1)) { EXPECT_EQ(k, state.FilterDelay()); @@ -241,7 +214,8 @@ TEST(AecState, ExternalDelay) { x.fill(0.f); RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30, std::vector<size_t>(1, 30)); - std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30); + std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response( + kAdaptiveFilterLength); for (auto& v : frequency_response) { v.fill(0.01f); } @@ -251,7 +225,7 @@ TEST(AecState, ExternalDelay) { for (size_t k = 0; k < frequency_response.size() - 1; ++k) { state.HandleEchoPathChange(EchoPathVariability(false, false)); - state.Update(frequency_response, impulse_response, + state.Update(frequency_response, impulse_response, true, rtc::Optional<size_t>(k * kBlockSize + 5), render_buffer, E2_main, Y2, x, s, false); EXPECT_TRUE(state.ExternalDelay()); @@ -261,8 +235,9 @@ TEST(AecState, ExternalDelay) { // Verify that the externally reported delay is properly unset when it is no // longer present. state.HandleEchoPathChange(EchoPathVariability(false, false)); - state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(), - render_buffer, E2_main, Y2, x, s, false); + state.Update(frequency_response, impulse_response, true, + rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s, + false); EXPECT_FALSE(state.ExternalDelay()); } diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc index cb7e05bbce2..341c89a6e46 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc @@ -172,11 +172,12 @@ void EchoRemoverImpl::ProcessCapture( // Update the AEC state information. aec_state_.Update(subtractor_.FilterFrequencyResponse(), subtractor_.FilterImpulseResponse(), - echo_path_delay_samples, render_buffer, E2_main, Y2, x0, - subtractor_output.s_main, echo_leakage_detected_); + subtractor_.ConvergedFilter(), echo_path_delay_samples, + render_buffer, E2_main, Y2, x0, subtractor_output.s_main, + echo_leakage_detected_); // Choose the linear output. - output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0); + output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0); data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0], LowestBandRate(sample_rate_hz_), 1); data_dumper_->DumpRaw("aec3_output_linear", y0); diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc index 6d0423faaf2..e3f968ce771 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc @@ -135,7 +135,7 @@ void RunFilterUpdateTest(int num_blocks_to_process, // Update the delay. aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(main_filter.FilterFrequencyResponse(), - main_filter.FilterImpulseResponse(), + main_filter.FilterImpulseResponse(), true, rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0], s, false); } diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc index 785b837ddb6..e173aa14d94 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc @@ -106,7 +106,7 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(size_t num_bands) fft_buffer_( optimization_, num_bands, - std::max(kResidualEchoPowerRenderWindowSize, kAdaptiveFilterLength), + std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength), std::vector<size_t>(1, kAdaptiveFilterLength)), api_call_jitter_buffer_(num_bands), zero_block_(num_bands, std::vector<float>(kBlockSize, 0.f)) { diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc index c5b0161e8ca..04251a4cca2 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc @@ -74,9 +74,6 @@ void RenderNoisePower( } } -// Assume a minimum echo path gain of -33 dB for headsets. -constexpr float kHeadsetEchoPathGain = 0.0005f; - } // namespace ResidualEchoEstimator::ResidualEchoEstimator( @@ -95,24 +92,29 @@ void ResidualEchoEstimator::Estimate( std::array<float, kFftLengthBy2Plus1>* R2) { RTC_DCHECK(R2); - const rtc::Optional<size_t> delay = - aec_state.ExternalDelay() - ? (aec_state.FilterDelay() ? aec_state.FilterDelay() - : aec_state.ExternalDelay()) - : rtc::Optional<size_t>(); - // Estimate the power of the stationary noise in the render signal. RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_); // Estimate the residual echo power. - if (aec_state.LinearEchoEstimate()) { RTC_DCHECK(aec_state.FilterDelay()); const int filter_delay = *aec_state.FilterDelay(); LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2); AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay, aec_state.ReverbDecay(), R2); + + // If the echo is saturated, estimate the echo power as the maximum echo + // power with a leakage factor. + if (aec_state.SaturatedEcho()) { + R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f); + } } else { + const rtc::Optional<size_t> delay = + aec_state.ExternalDelay() + ? (aec_state.FilterDelay() ? aec_state.FilterDelay() + : aec_state.ExternalDelay()) + : rtc::Optional<size_t>(); + // Estimate the echo generating signal power. std::array<float, kFftLengthBy2Plus1> X2; if (aec_state.ExternalDelay() && aec_state.FilterDelay()) { @@ -120,14 +122,17 @@ void ResidualEchoEstimator::Estimate( const int delay_use = static_cast<int>(*delay); // Computes the spectral power over the blocks surrounding the delay. - RTC_DCHECK_LT(delay_use, kResidualEchoPowerRenderWindowSize); + constexpr int kKnownDelayRenderWindowSize = 5; + static_assert( + kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize, + "Requirement to ensure that the render buffer is overrun"); EchoGeneratingPower( render_buffer, std::max(0, delay_use - 1), - std::min(kResidualEchoPowerRenderWindowSize - 1, delay_use + 1), &X2); + std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2); } else { // Computes the spectral power over the latest blocks. - EchoGeneratingPower(render_buffer, 0, - kResidualEchoPowerRenderWindowSize - 1, &X2); + EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1, + &X2); } // Subtract the stationary noise power to avoid stationary noise causing @@ -136,23 +141,25 @@ void ResidualEchoEstimator::Estimate( X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(), [](float a, float b) { return std::max(0.f, a - 10.f * b); }); - NonLinearEstimate(aec_state.HeadsetDetected(), X2, Y2, R2); - AddEchoReverb(*R2, aec_state.SaturatedEcho(), - std::min(static_cast<size_t>(kAdaptiveFilterLength), - delay.value_or(kAdaptiveFilterLength)), - aec_state.ReverbDecay(), R2); + NonLinearEstimate( + aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(), + config_.param.ep_strength.bounded_erl, aec_state.TransparentMode(), + aec_state.InitialState(), X2, Y2, R2); + + if (aec_state.ExternalDelay() && aec_state.FilterDelay() && + aec_state.SaturatedEcho()) { + AddEchoReverb(*R2, aec_state.SaturatedEcho(), + std::min(static_cast<size_t>(kAdaptiveFilterLength), + delay.value_or(kAdaptiveFilterLength)), + aec_state.ReverbDecay(), R2); + } } // If the echo is deemed inaudible, set the residual echo to zero. - if (aec_state.InaudibleEcho() && - (aec_state.ExternalDelay() || aec_state.HeadsetDetected())) { + if (aec_state.InaudibleEcho()) { R2->fill(0.f); - } - - // If the echo is saturated, estimate the echo power as the maximum echo power - // with a leakage factor. - if (aec_state.SaturatedEcho()) { - R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f); + R2_old_.fill(0.f); + R2_hold_counter_.fill(0.f); } std::copy(R2->begin(), R2->end(), R2_old_.begin()); @@ -183,17 +190,39 @@ void ResidualEchoEstimator::LinearEstimate( } void ResidualEchoEstimator::NonLinearEstimate( - bool headset_detected, + bool sufficient_filter_updates, + bool saturated_echo, + bool bounded_erl, + bool transparent_mode, + bool initial_state, const std::array<float, kFftLengthBy2Plus1>& X2, const std::array<float, kFftLengthBy2Plus1>& Y2, std::array<float, kFftLengthBy2Plus1>* R2) { - // Choose gains. - const float echo_path_gain_lf = - headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.lf; - const float echo_path_gain_mf = - headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.mf; - const float echo_path_gain_hf = - headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.hf; + float echo_path_gain_lf; + float echo_path_gain_mf; + float echo_path_gain_hf; + + // Set echo path gains. + if (saturated_echo) { + // If the echo could be saturated, use a very conservative gain. + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f; + } else if (sufficient_filter_updates && !bounded_erl) { + // If the filter should have been able to converge, and no assumption is + // possible on the ERL, use a low gain. + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f; + } else if ((sufficient_filter_updates && bounded_erl) || transparent_mode) { + // If the filter should have been able to converge, and and it is known that + // the ERL is bounded, use a very low gain. + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.001f; + } else if (!initial_state) { + // If the AEC is no longer in an initial state, assume a weak echo path. + echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f; + } else { + // In the initial state, use conservative gains. + echo_path_gain_lf = config_.param.ep_strength.lf; + echo_path_gain_mf = config_.param.ep_strength.mf; + echo_path_gain_hf = config_.param.ep_strength.hf; + } // Compute preliminary residual echo. std::transform( diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h index d766f123a4f..d2eada3edf8 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h @@ -49,7 +49,11 @@ class ResidualEchoEstimator { // Estimates the residual echo power based on the estimate of the echo path // gain. - void NonLinearEstimate(bool headset_detected, + void NonLinearEstimate(bool sufficient_filter_updates, + bool saturated_echo, + bool bounded_erl, + bool transparent_mode, + bool initial_state, const std::array<float, kFftLengthBy2Plus1>& X2, const std::array<float, kFftLengthBy2Plus1>& Y2, std::array<float, kFftLengthBy2Plus1>* R2); diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc index 46b726d9967..a44a548e708 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc @@ -83,8 +83,8 @@ TEST(ResidualEchoEstimator, BasicTest) { render_buffer.Insert(x); aec_state.HandleEchoPathChange(echo_path_variability); - aec_state.Update(H2, h, rtc::Optional<size_t>(2), render_buffer, E2_main, - Y2, x[0], s, false); + aec_state.Update(H2, h, true, rtc::Optional<size_t>(2), render_buffer, + E2_main, Y2, x[0], s, false); estimator.Estimate(aec_state, render_buffer, S2_linear, Y2, &R2); } diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc index c64e5a4ef9b..c1909f3b90b 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc @@ -11,6 +11,7 @@ #include "modules/audio_processing/aec3/subtractor.h" #include <algorithm> +#include <numeric> #include "api/array_view.h" #include "modules/audio_processing/logging/apm_data_dumper.h" @@ -63,6 +64,7 @@ void Subtractor::HandleEchoPathChange( shadow_filter_.HandleEchoPathChange(); G_main_.HandleEchoPathChange(); G_shadow_.HandleEchoPathChange(); + converged_filter_ = false; } } @@ -89,6 +91,19 @@ void Subtractor::Process(const RenderBuffer& render_buffer, shadow_filter_.Filter(render_buffer, &S); PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr); + if (!converged_filter_) { + const auto sum_of_squares = [](float a, float b) { return a + b * b; }; + const float e2_main = + std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares); + const float e2_shadow = + std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares); + const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares); + + if (y2 > kBlockSize * 50.f * 50.f) { + converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2); + } + } + // Compute spectra for future use. E_main.Spectrum(optimization_, &output->E2_main); E_shadow.Spectrum(optimization_, &output->E2_shadow); diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h index e76155494c0..680bf458066 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h @@ -57,6 +57,8 @@ class Subtractor { return main_filter_.FilterImpulseResponse(); } + bool ConvergedFilter() const { return converged_filter_; } + private: const Aec3Fft fft_; ApmDataDumper* data_dumper_; @@ -65,6 +67,7 @@ class Subtractor { AdaptiveFirFilter shadow_filter_; MainFilterUpdateGain G_main_; ShadowFilterUpdateGain G_shadow_; + bool converged_filter_ = false; RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor); }; diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc index ea28c4e6638..0450b6ccd49 100644 --- a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc +++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc @@ -69,6 +69,7 @@ float RunSubtractorTest(int num_blocks_to_process, aec_state.HandleEchoPathChange(EchoPathVariability(false, false)); aec_state.Update(subtractor.FilterFrequencyResponse(), subtractor.FilterImpulseResponse(), + subtractor.ConvergedFilter(), rtc::Optional<size_t>(delay_samples / kBlockSize), render_buffer, E2_main, Y2, x[0], output.s_main, false); } |