summaryrefslogtreecommitdiff
path: root/chromium/third_party/webrtc/modules/audio_processing
diff options
context:
space:
mode:
Diffstat (limited to 'chromium/third_party/webrtc/modules/audio_processing')
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc5
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h4
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc141
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h24
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc63
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc7
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc2
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc2
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc99
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h6
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc4
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc15
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h3
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc1
-rw-r--r--chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h7
15 files changed, 207 insertions, 176 deletions
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
index 9ff3c09b333..40f64fd1047 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/adaptive_fir_filter_unittest.cc
@@ -365,8 +365,9 @@ TEST(AdaptiveFirFilter, FilterAndAdapt) {
filter.Adapt(render_buffer, G);
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(filter.FilterFrequencyResponse(),
- filter.FilterImpulseResponse(), rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x[0], s, false);
+ filter.FilterImpulseResponse(), true,
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2,
+ x[0], s, false);
}
// Verify that the filter is able to perform well.
EXPECT_LT(1000 * std::inner_product(e.begin(), e.end(), e.begin(), 0.f),
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h
index 2442f909e57..031e9b14769 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec3_common.h
@@ -39,7 +39,7 @@ constexpr size_t kFftLengthBy2Minus1 = kFftLengthBy2 - 1;
constexpr size_t kFftLength = 2 * kFftLengthBy2;
constexpr int kAdaptiveFilterLength = 12;
-constexpr int kResidualEchoPowerRenderWindowSize = 30;
+constexpr int kUnknownDelayRenderWindowSize = 30;
constexpr int kAdaptiveFilterTimeDomainLength =
kAdaptiveFilterLength * kFftLengthBy2;
@@ -69,6 +69,8 @@ constexpr size_t kRenderTransferQueueSize = kMaxApiCallsJitterBlocks / 2;
static_assert(2 * kRenderTransferQueueSize >= kMaxApiCallsJitterBlocks,
"Requirement to ensure buffer overflow detection");
+constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
+
// TODO(peah): Integrate this with how it is done inside audio_processing_impl.
constexpr size_t NumBandsForRate(int sample_rate_hz) {
return static_cast<size_t>(sample_rate_hz == 8000 ? 1
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc
index 6ea54fcf3d2..14b83e10be5 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.cc
@@ -23,54 +23,29 @@ namespace webrtc {
namespace {
// Computes delay of the adaptive filter.
-rtc::Optional<size_t> EstimateFilterDelay(
+int EstimateFilterDelay(
const std::vector<std::array<float, kFftLengthBy2Plus1>>&
adaptive_filter_frequency_response) {
const auto& H2 = adaptive_filter_frequency_response;
-
- size_t reliable_delays_sum = 0;
- size_t num_reliable_delays = 0;
-
constexpr size_t kUpperBin = kFftLengthBy2 - 5;
- constexpr float kMinPeakMargin = 10.f;
- const size_t kTailPartition = H2.size() - 1;
+ RTC_DCHECK_GE(kAdaptiveFilterLength, H2.size());
+ std::array<int, kAdaptiveFilterLength> delays;
+ delays.fill(0);
for (size_t k = 1; k < kUpperBin; ++k) {
// Find the maximum of H2[j].
- int peak = 0;
+ size_t peak = 0;
for (size_t j = 0; j < H2.size(); ++j) {
if (H2[j][k] > H2[peak][k]) {
peak = j;
}
}
-
- // Count the peak as a delay only if the peak is sufficiently larger than
- // the tail.
- if (kMinPeakMargin * H2[kTailPartition][k] < H2[peak][k]) {
- reliable_delays_sum += peak;
- ++num_reliable_delays;
- }
- }
-
- // Return no delay if not sufficient delays have been found.
- if (num_reliable_delays < 21) {
- return rtc::Optional<size_t>();
+ ++delays[peak];
}
- const size_t delay = reliable_delays_sum / num_reliable_delays;
- // Sanity check that the peak is not caused by a false strong DC-component in
- // the filter.
- for (size_t k = 1; k < kUpperBin; ++k) {
- if (H2[delay][k] > H2[delay][0]) {
- RTC_DCHECK_GT(H2.size(), delay);
- return rtc::Optional<size_t>(delay);
- }
- }
- return rtc::Optional<size_t>();
+ return std::distance(delays.begin(),
+ std::max_element(delays.begin(), delays.end()));
}
-constexpr int kEchoPathChangeCounterInitial = kNumBlocksPerSecond / 5;
-constexpr int kEchoPathChangeCounterMax = 2 * kNumBlocksPerSecond;
-
} // namespace
int AecState::instance_count_ = 0;
@@ -81,7 +56,6 @@ AecState::AecState(const AudioProcessing::Config::EchoCanceller3& config)
erle_estimator_(config.param.erle.min,
config.param.erle.max_l,
config.param.erle.max_h),
- echo_path_change_counter_(kEchoPathChangeCounterInitial),
config_(config),
reverb_decay_(config_.param.ep_strength.default_len) {}
@@ -102,10 +76,10 @@ void AecState::HandleEchoPathChange(
blocks_with_filter_adaptation_ = 0;
render_received_ = false;
force_zero_gain_ = true;
- echo_path_change_counter_ = kEchoPathChangeCounterMax;
+ capture_block_counter_ = 0;
}
if (echo_path_variability.gain_change) {
- echo_path_change_counter_ = kEchoPathChangeCounterInitial;
+ capture_block_counter_ = kNumBlocksPerSecond;
}
}
}
@@ -114,6 +88,7 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
adaptive_filter_frequency_response,
const std::array<float, kAdaptiveFilterTimeDomainLength>&
adaptive_filter_impulse_response,
+ bool converged_filter,
const rtc::Optional<size_t>& external_delay_samples,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
@@ -121,77 +96,79 @@ void AecState::Update(const std::vector<std::array<float, kFftLengthBy2Plus1>>&
rtc::ArrayView<const float> x,
const std::array<float, kBlockSize>& s,
bool echo_leakage_detected) {
- // Update the echo audibility evaluator.
- echo_audibility_.Update(x, s);
-
// Store input parameters.
echo_leakage_detected_ = echo_leakage_detected;
// Update counters.
- const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
-
- const bool active_render_block =
- x_energy > (config_.param.render_levels.active_render_limit *
- config_.param.render_levels.active_render_limit) *
- kFftLengthBy2;
- if (active_render_block) {
- render_received_ = true;
- }
- blocks_with_filter_adaptation_ +=
- (active_render_block && (!SaturatedCapture()) ? 1 : 0);
- --echo_path_change_counter_;
+ ++capture_block_counter_;
// Force zero echo suppression gain after an echo path change to allow at
// least some render data to be collected in order to avoid an initial echo
// burst.
- constexpr size_t kZeroGainBlocksAfterChange = kNumBlocksPerSecond / 5;
- force_zero_gain_ = (++force_zero_gain_counter_) < kZeroGainBlocksAfterChange;
+ force_zero_gain_ = (++force_zero_gain_counter_) < kNumBlocksPerSecond / 5;
// Estimate delays.
- filter_delay_ = EstimateFilterDelay(adaptive_filter_frequency_response);
+ filter_delay_ = rtc::Optional<size_t>(
+ EstimateFilterDelay(adaptive_filter_frequency_response));
external_delay_ =
external_delay_samples
? rtc::Optional<size_t>(*external_delay_samples / kBlockSize)
: rtc::Optional<size_t>();
// Update the ERL and ERLE measures.
- if (filter_delay_ && echo_path_change_counter_ <= 0) {
+ if (converged_filter && capture_block_counter_ >= 2 * kNumBlocksPerSecond) {
const auto& X2 = render_buffer.Spectrum(*filter_delay_);
erle_estimator_.Update(X2, Y2, E2_main);
erl_estimator_.Update(X2, Y2);
}
+ // Update the echo audibility evaluator.
+ echo_audibility_.Update(x, s, converged_filter);
+
// Detect and flag echo saturation.
// TODO(peah): Add the delay in this computation to ensure that the render and
// capture signals are properly aligned.
RTC_DCHECK_LT(0, x.size());
const float max_sample = fabs(*std::max_element(
x.begin(), x.end(), [](float a, float b) { return a * a < b * b; }));
- const bool saturated_echo =
- previous_max_sample_ * 100 > 1600 && SaturatedCapture();
- previous_max_sample_ = max_sample;
- // Counts the blocks since saturation.
- constexpr size_t kSaturationLeakageBlocks = 20;
- blocks_since_last_saturation_ =
- saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
- echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
+ if (config_.param.ep_strength.echo_can_saturate) {
+ const bool saturated_echo =
+ (previous_max_sample_ > 200.f) && SaturatedCapture();
+
+ // Counts the blocks since saturation.
+ constexpr size_t kSaturationLeakageBlocks = 20;
+ blocks_since_last_saturation_ =
+ saturated_echo ? 0 : blocks_since_last_saturation_ + 1;
+
+ echo_saturation_ = blocks_since_last_saturation_ < kSaturationLeakageBlocks;
+ } else {
+ echo_saturation_ = false;
+ }
+ previous_max_sample_ = max_sample;
// Flag whether the linear filter estimate is usable.
- constexpr size_t kEchoPathChangeConvergenceBlocks = 2 * kNumBlocksPerSecond;
usable_linear_estimate_ =
- (!echo_saturation_) &&
- (!render_received_ ||
- blocks_with_filter_adaptation_ > kEchoPathChangeConvergenceBlocks) &&
- filter_delay_ && echo_path_change_counter_ <= 0 && external_delay_;
+ (!echo_saturation_) && (converged_filter || SufficientFilterUpdates()) &&
+ capture_block_counter_ >= 2 * kNumBlocksPerSecond && external_delay_;
// After an amount of active render samples for which an echo should have been
// detected in the capture signal if the ERL was not infinite, flag that a
- // headset is used.
- constexpr size_t kHeadSetDetectionBlocks = 5 * kNumBlocksPerSecond;
- headset_detected_ = !external_delay_ && !filter_delay_ &&
+ // transparent mode should be entered.
+ const float x_energy = std::inner_product(x.begin(), x.end(), x.begin(), 0.f);
+ const bool active_render_block =
+ x_energy > (config_.param.render_levels.active_render_limit *
+ config_.param.render_levels.active_render_limit) *
+ kFftLengthBy2;
+ if (active_render_block) {
+ render_received_ = true;
+ }
+ blocks_with_filter_adaptation_ +=
+ (active_render_block && (!SaturatedCapture()) ? 1 : 0);
+
+ transparent_mode_ = !converged_filter &&
(!render_received_ || blocks_with_filter_adaptation_ >=
- kHeadSetDetectionBlocks);
+ 5 * kNumBlocksPerSecond);
// Update the room reverb estimate.
UpdateReverb(adaptive_filter_impulse_response);
@@ -276,7 +253,8 @@ void AecState::UpdateReverb(
}
void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
- const std::array<float, kBlockSize>& s) {
+ const std::array<float, kBlockSize>& s,
+ bool converged_filter) {
auto result_x = std::minmax_element(x.begin(), x.end());
auto result_s = std::minmax_element(s.begin(), s.end());
const float x_abs =
@@ -284,10 +262,18 @@ void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
const float s_abs =
std::max(std::abs(*result_s.first), std::abs(*result_s.second));
- if (x_abs < 5.f) {
- ++low_farend_counter_;
+ if (converged_filter) {
+ if (x_abs < 20.f) {
+ ++low_farend_counter_;
+ } else {
+ low_farend_counter_ = 0;
+ }
} else {
- low_farend_counter_ = 0;
+ if (x_abs < 100.f) {
+ ++low_farend_counter_;
+ } else {
+ low_farend_counter_ = 0;
+ }
}
// The echo is deemed as not audible if the echo estimate is on the level of
@@ -296,7 +282,8 @@ void AecState::EchoAudibility::Update(rtc::ArrayView<const float> x,
// any residual echo that is below the quantization noise level. Furthermore,
// cases where the render signal is very close to zero are also identified as
// not producing audible echo.
- inaudible_echo_ = max_nearend_ > 500 && s_abs < 30.f;
+ inaudible_echo_ = (max_nearend_ > 500 && s_abs < 30.f) ||
+ (!converged_filter && x_abs < 500);
inaudible_echo_ = inaudible_echo_ || low_farend_counter_ > 20;
}
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h
index 53899e55d14..358c74d8e0b 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state.h
@@ -72,8 +72,8 @@ class AecState {
capture_signal_saturation_ = capture_signal_saturation;
}
- // Returns whether a probable headset setup has been detected.
- bool HeadsetDetected() const { return headset_detected_; }
+ // Returns whether the transparent mode is active
+ bool TransparentMode() const { return transparent_mode_; }
// Takes appropriate action at an echo path change.
void HandleEchoPathChange(const EchoPathVariability& echo_path_variability);
@@ -92,10 +92,20 @@ class AecState {
echo_audibility_.UpdateWithOutput(e);
}
+ // Returns whether the linear filter should have been able to adapt properly.
+ bool SufficientFilterUpdates() const {
+ return blocks_with_filter_adaptation_ >= kEchoPathChangeConvergenceBlocks;
+ }
+
// Returns whether the echo subtractor can be used to determine the residual
// echo.
bool LinearEchoEstimate() const {
- return UsableLinearEstimate() && !HeadsetDetected();
+ return UsableLinearEstimate() && !TransparentMode();
+ }
+
+ // Returns whether the AEC is in an initial state.
+ bool InitialState() const {
+ return capture_block_counter_ < 3 * kNumBlocksPerSecond;
}
// Updates the aec state.
@@ -103,6 +113,7 @@ class AecState {
adaptive_filter_frequency_response,
const std::array<float, kAdaptiveFilterTimeDomainLength>&
adaptive_filter_impulse_response,
+ bool converged_filter,
const rtc::Optional<size_t>& external_delay_samples,
const RenderBuffer& render_buffer,
const std::array<float, kFftLengthBy2Plus1>& E2_main,
@@ -115,7 +126,8 @@ class AecState {
class EchoAudibility {
public:
void Update(rtc::ArrayView<const float> x,
- const std::array<float, kBlockSize>& s);
+ const std::array<float, kBlockSize>& s,
+ bool converged_filter);
void UpdateWithOutput(rtc::ArrayView<const float> e);
bool InaudibleEcho() const { return inaudible_echo_; }
@@ -133,13 +145,13 @@ class AecState {
std::unique_ptr<ApmDataDumper> data_dumper_;
ErlEstimator erl_estimator_;
ErleEstimator erle_estimator_;
- int echo_path_change_counter_;
+ size_t capture_block_counter_ = 0;
size_t blocks_with_filter_adaptation_ = 0;
bool usable_linear_estimate_ = false;
bool echo_leakage_detected_ = false;
bool capture_signal_saturation_ = false;
bool echo_saturation_ = false;
- bool headset_detected_ = false;
+ bool transparent_mode_ = false;
float previous_max_sample_ = 0.f;
bool force_zero_gain_ = false;
bool render_received_ = false;
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc
index 8413413ce2f..4956456a8c0 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/aec_state_unittest.cc
@@ -43,7 +43,7 @@ TEST(AecState, NormalUsage) {
// Verify that linear AEC usability is false when the filter is diverged and
// there is no external delay reported.
- state.Update(diverged_filter_frequency_response, impulse_response,
+ state.Update(diverged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0], s,
false);
EXPECT_FALSE(state.UsableLinearEstimate());
@@ -51,7 +51,7 @@ TEST(AecState, NormalUsage) {
// Verify that linear AEC usability is true when the filter is converged
std::fill(x[0].begin(), x[0].end(), 101.f);
for (int k = 0; k < 3000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
@@ -60,7 +60,7 @@ TEST(AecState, NormalUsage) {
// Verify that linear AEC usability becomes false after an echo path change is
// reported
state.HandleEchoPathChange(EchoPathVariability(true, false));
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
EXPECT_FALSE(state.UsableLinearEstimate());
@@ -68,25 +68,25 @@ TEST(AecState, NormalUsage) {
// Verify that the active render detection works as intended.
std::fill(x[0].begin(), x[0].end(), 101.f);
state.HandleEchoPathChange(EchoPathVariability(true, true));
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
EXPECT_FALSE(state.ActiveRender());
for (int k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
EXPECT_TRUE(state.ActiveRender());
// Verify that echo leakage is properly reported.
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
EXPECT_FALSE(state.EchoLeakageDetected());
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
true);
EXPECT_TRUE(state.EchoLeakageDetected());
@@ -103,7 +103,7 @@ TEST(AecState, NormalUsage) {
Y2.fill(10.f * 10000.f * 10000.f);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
@@ -120,7 +120,7 @@ TEST(AecState, NormalUsage) {
E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(10.f * E2_main[0]);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
@@ -141,7 +141,7 @@ TEST(AecState, NormalUsage) {
E2_main.fill(1.f * 10000.f * 10000.f);
Y2.fill(5.f * E2_main[0]);
for (size_t k = 0; k < 1000; ++k) {
- state.Update(converged_filter_frequency_response, impulse_response,
+ state.Update(converged_filter_frequency_response, impulse_response, true,
rtc::Optional<size_t>(2), render_buffer, E2_main, Y2, x[0], s,
false);
}
@@ -161,34 +161,6 @@ TEST(AecState, NormalUsage) {
}
}
-// Verifies the a non-significant delay is correctly identified.
-TEST(AecState, NonSignificantDelay) {
- AecState state(AudioProcessing::Config::EchoCanceller3{});
- RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
- std::vector<size_t>(1, 30));
- std::array<float, kFftLengthBy2Plus1> E2_main;
- std::array<float, kFftLengthBy2Plus1> Y2;
- std::array<float, kBlockSize> x;
- EchoPathVariability echo_path_variability(false, false);
- std::array<float, kBlockSize> s;
- s.fill(100.f);
- x.fill(0.f);
-
- std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30);
- for (auto& v : frequency_response) {
- v.fill(0.01f);
- }
-
- std::array<float, kAdaptiveFilterTimeDomainLength> impulse_response;
- impulse_response.fill(0.f);
-
- // Verify that a non-significant filter delay is identified correctly.
- state.HandleEchoPathChange(echo_path_variability);
- state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x, s, false);
- EXPECT_FALSE(state.FilterDelay());
-}
-
// Verifies the delay for a converged filter is correctly identified.
TEST(AecState, ConvergedFilterDelay) {
constexpr int kFilterLength = 10;
@@ -217,8 +189,9 @@ TEST(AecState, ConvergedFilterDelay) {
frequency_response[k].fill(100.f);
frequency_response[k][0] = 0.f;
state.HandleEchoPathChange(echo_path_variability);
- state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x, s, false);
+ state.Update(frequency_response, impulse_response, true,
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s,
+ false);
EXPECT_TRUE(k == (kFilterLength - 1) || state.FilterDelay());
if (k != (kFilterLength - 1)) {
EXPECT_EQ(k, state.FilterDelay());
@@ -241,7 +214,8 @@ TEST(AecState, ExternalDelay) {
x.fill(0.f);
RenderBuffer render_buffer(Aec3Optimization::kNone, 3, 30,
std::vector<size_t>(1, 30));
- std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(30);
+ std::vector<std::array<float, kFftLengthBy2Plus1>> frequency_response(
+ kAdaptiveFilterLength);
for (auto& v : frequency_response) {
v.fill(0.01f);
}
@@ -251,7 +225,7 @@ TEST(AecState, ExternalDelay) {
for (size_t k = 0; k < frequency_response.size() - 1; ++k) {
state.HandleEchoPathChange(EchoPathVariability(false, false));
- state.Update(frequency_response, impulse_response,
+ state.Update(frequency_response, impulse_response, true,
rtc::Optional<size_t>(k * kBlockSize + 5), render_buffer,
E2_main, Y2, x, s, false);
EXPECT_TRUE(state.ExternalDelay());
@@ -261,8 +235,9 @@ TEST(AecState, ExternalDelay) {
// Verify that the externally reported delay is properly unset when it is no
// longer present.
state.HandleEchoPathChange(EchoPathVariability(false, false));
- state.Update(frequency_response, impulse_response, rtc::Optional<size_t>(),
- render_buffer, E2_main, Y2, x, s, false);
+ state.Update(frequency_response, impulse_response, true,
+ rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x, s,
+ false);
EXPECT_FALSE(state.ExternalDelay());
}
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc
index cb7e05bbce2..341c89a6e46 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/echo_remover.cc
@@ -172,11 +172,12 @@ void EchoRemoverImpl::ProcessCapture(
// Update the AEC state information.
aec_state_.Update(subtractor_.FilterFrequencyResponse(),
subtractor_.FilterImpulseResponse(),
- echo_path_delay_samples, render_buffer, E2_main, Y2, x0,
- subtractor_output.s_main, echo_leakage_detected_);
+ subtractor_.ConvergedFilter(), echo_path_delay_samples,
+ render_buffer, E2_main, Y2, x0, subtractor_output.s_main,
+ echo_leakage_detected_);
// Choose the linear output.
- output_selector_.FormLinearOutput(!aec_state_.HeadsetDetected(), e_main, y0);
+ output_selector_.FormLinearOutput(!aec_state_.TransparentMode(), e_main, y0);
data_dumper_->DumpWav("aec3_output_linear", kBlockSize, &y0[0],
LowestBandRate(sample_rate_hz_), 1);
data_dumper_->DumpRaw("aec3_output_linear", y0);
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
index 6d0423faaf2..e3f968ce771 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/main_filter_update_gain_unittest.cc
@@ -135,7 +135,7 @@ void RunFilterUpdateTest(int num_blocks_to_process,
// Update the delay.
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(main_filter.FilterFrequencyResponse(),
- main_filter.FilterImpulseResponse(),
+ main_filter.FilterImpulseResponse(), true,
rtc::Optional<size_t>(), render_buffer, E2_main, Y2, x[0],
s, false);
}
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc
index 785b837ddb6..e173aa14d94 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/render_delay_buffer.cc
@@ -106,7 +106,7 @@ RenderDelayBufferImpl::RenderDelayBufferImpl(size_t num_bands)
fft_buffer_(
optimization_,
num_bands,
- std::max(kResidualEchoPowerRenderWindowSize, kAdaptiveFilterLength),
+ std::max(kUnknownDelayRenderWindowSize, kAdaptiveFilterLength),
std::vector<size_t>(1, kAdaptiveFilterLength)),
api_call_jitter_buffer_(num_bands),
zero_block_(num_bands, std::vector<float>(kBlockSize, 0.f)) {
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
index c5b0161e8ca..04251a4cca2 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.cc
@@ -74,9 +74,6 @@ void RenderNoisePower(
}
}
-// Assume a minimum echo path gain of -33 dB for headsets.
-constexpr float kHeadsetEchoPathGain = 0.0005f;
-
} // namespace
ResidualEchoEstimator::ResidualEchoEstimator(
@@ -95,24 +92,29 @@ void ResidualEchoEstimator::Estimate(
std::array<float, kFftLengthBy2Plus1>* R2) {
RTC_DCHECK(R2);
- const rtc::Optional<size_t> delay =
- aec_state.ExternalDelay()
- ? (aec_state.FilterDelay() ? aec_state.FilterDelay()
- : aec_state.ExternalDelay())
- : rtc::Optional<size_t>();
-
// Estimate the power of the stationary noise in the render signal.
RenderNoisePower(render_buffer, &X2_noise_floor_, &X2_noise_floor_counter_);
// Estimate the residual echo power.
-
if (aec_state.LinearEchoEstimate()) {
RTC_DCHECK(aec_state.FilterDelay());
const int filter_delay = *aec_state.FilterDelay();
LinearEstimate(S2_linear, aec_state.Erle(), filter_delay, R2);
AddEchoReverb(S2_linear, aec_state.SaturatedEcho(), filter_delay,
aec_state.ReverbDecay(), R2);
+
+ // If the echo is saturated, estimate the echo power as the maximum echo
+ // power with a leakage factor.
+ if (aec_state.SaturatedEcho()) {
+ R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
+ }
} else {
+ const rtc::Optional<size_t> delay =
+ aec_state.ExternalDelay()
+ ? (aec_state.FilterDelay() ? aec_state.FilterDelay()
+ : aec_state.ExternalDelay())
+ : rtc::Optional<size_t>();
+
// Estimate the echo generating signal power.
std::array<float, kFftLengthBy2Plus1> X2;
if (aec_state.ExternalDelay() && aec_state.FilterDelay()) {
@@ -120,14 +122,17 @@ void ResidualEchoEstimator::Estimate(
const int delay_use = static_cast<int>(*delay);
// Computes the spectral power over the blocks surrounding the delay.
- RTC_DCHECK_LT(delay_use, kResidualEchoPowerRenderWindowSize);
+ constexpr int kKnownDelayRenderWindowSize = 5;
+ static_assert(
+ kUnknownDelayRenderWindowSize >= kKnownDelayRenderWindowSize,
+ "Requirement to ensure that the render buffer is overrun");
EchoGeneratingPower(
render_buffer, std::max(0, delay_use - 1),
- std::min(kResidualEchoPowerRenderWindowSize - 1, delay_use + 1), &X2);
+ std::min(kKnownDelayRenderWindowSize - 1, delay_use + 1), &X2);
} else {
// Computes the spectral power over the latest blocks.
- EchoGeneratingPower(render_buffer, 0,
- kResidualEchoPowerRenderWindowSize - 1, &X2);
+ EchoGeneratingPower(render_buffer, 0, kUnknownDelayRenderWindowSize - 1,
+ &X2);
}
// Subtract the stationary noise power to avoid stationary noise causing
@@ -136,23 +141,25 @@ void ResidualEchoEstimator::Estimate(
X2.begin(), X2.end(), X2_noise_floor_.begin(), X2.begin(),
[](float a, float b) { return std::max(0.f, a - 10.f * b); });
- NonLinearEstimate(aec_state.HeadsetDetected(), X2, Y2, R2);
- AddEchoReverb(*R2, aec_state.SaturatedEcho(),
- std::min(static_cast<size_t>(kAdaptiveFilterLength),
- delay.value_or(kAdaptiveFilterLength)),
- aec_state.ReverbDecay(), R2);
+ NonLinearEstimate(
+ aec_state.SufficientFilterUpdates(), aec_state.SaturatedEcho(),
+ config_.param.ep_strength.bounded_erl, aec_state.TransparentMode(),
+ aec_state.InitialState(), X2, Y2, R2);
+
+ if (aec_state.ExternalDelay() && aec_state.FilterDelay() &&
+ aec_state.SaturatedEcho()) {
+ AddEchoReverb(*R2, aec_state.SaturatedEcho(),
+ std::min(static_cast<size_t>(kAdaptiveFilterLength),
+ delay.value_or(kAdaptiveFilterLength)),
+ aec_state.ReverbDecay(), R2);
+ }
}
// If the echo is deemed inaudible, set the residual echo to zero.
- if (aec_state.InaudibleEcho() &&
- (aec_state.ExternalDelay() || aec_state.HeadsetDetected())) {
+ if (aec_state.InaudibleEcho()) {
R2->fill(0.f);
- }
-
- // If the echo is saturated, estimate the echo power as the maximum echo power
- // with a leakage factor.
- if (aec_state.SaturatedEcho()) {
- R2->fill((*std::max_element(R2->begin(), R2->end())) * 100.f);
+ R2_old_.fill(0.f);
+ R2_hold_counter_.fill(0.f);
}
std::copy(R2->begin(), R2->end(), R2_old_.begin());
@@ -183,17 +190,39 @@ void ResidualEchoEstimator::LinearEstimate(
}
void ResidualEchoEstimator::NonLinearEstimate(
- bool headset_detected,
+ bool sufficient_filter_updates,
+ bool saturated_echo,
+ bool bounded_erl,
+ bool transparent_mode,
+ bool initial_state,
const std::array<float, kFftLengthBy2Plus1>& X2,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2) {
- // Choose gains.
- const float echo_path_gain_lf =
- headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.lf;
- const float echo_path_gain_mf =
- headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.mf;
- const float echo_path_gain_hf =
- headset_detected ? kHeadsetEchoPathGain : config_.param.ep_strength.hf;
+ float echo_path_gain_lf;
+ float echo_path_gain_mf;
+ float echo_path_gain_hf;
+
+ // Set echo path gains.
+ if (saturated_echo) {
+ // If the echo could be saturated, use a very conservative gain.
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 10000.f;
+ } else if (sufficient_filter_updates && !bounded_erl) {
+ // If the filter should have been able to converge, and no assumption is
+ // possible on the ERL, use a low gain.
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f;
+ } else if ((sufficient_filter_updates && bounded_erl) || transparent_mode) {
+ // If the filter should have been able to converge, and and it is known that
+ // the ERL is bounded, use a very low gain.
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.001f;
+ } else if (!initial_state) {
+ // If the AEC is no longer in an initial state, assume a weak echo path.
+ echo_path_gain_lf = echo_path_gain_mf = echo_path_gain_hf = 0.01f;
+ } else {
+ // In the initial state, use conservative gains.
+ echo_path_gain_lf = config_.param.ep_strength.lf;
+ echo_path_gain_mf = config_.param.ep_strength.mf;
+ echo_path_gain_hf = config_.param.ep_strength.hf;
+ }
// Compute preliminary residual echo.
std::transform(
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
index d766f123a4f..d2eada3edf8 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator.h
@@ -49,7 +49,11 @@ class ResidualEchoEstimator {
// Estimates the residual echo power based on the estimate of the echo path
// gain.
- void NonLinearEstimate(bool headset_detected,
+ void NonLinearEstimate(bool sufficient_filter_updates,
+ bool saturated_echo,
+ bool bounded_erl,
+ bool transparent_mode,
+ bool initial_state,
const std::array<float, kFftLengthBy2Plus1>& X2,
const std::array<float, kFftLengthBy2Plus1>& Y2,
std::array<float, kFftLengthBy2Plus1>* R2);
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
index 46b726d9967..a44a548e708 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/residual_echo_estimator_unittest.cc
@@ -83,8 +83,8 @@ TEST(ResidualEchoEstimator, BasicTest) {
render_buffer.Insert(x);
aec_state.HandleEchoPathChange(echo_path_variability);
- aec_state.Update(H2, h, rtc::Optional<size_t>(2), render_buffer, E2_main,
- Y2, x[0], s, false);
+ aec_state.Update(H2, h, true, rtc::Optional<size_t>(2), render_buffer,
+ E2_main, Y2, x[0], s, false);
estimator.Estimate(aec_state, render_buffer, S2_linear, Y2, &R2);
}
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc
index c64e5a4ef9b..c1909f3b90b 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.cc
@@ -11,6 +11,7 @@
#include "modules/audio_processing/aec3/subtractor.h"
#include <algorithm>
+#include <numeric>
#include "api/array_view.h"
#include "modules/audio_processing/logging/apm_data_dumper.h"
@@ -63,6 +64,7 @@ void Subtractor::HandleEchoPathChange(
shadow_filter_.HandleEchoPathChange();
G_main_.HandleEchoPathChange();
G_shadow_.HandleEchoPathChange();
+ converged_filter_ = false;
}
}
@@ -89,6 +91,19 @@ void Subtractor::Process(const RenderBuffer& render_buffer,
shadow_filter_.Filter(render_buffer, &S);
PredictionError(fft_, S, y, &e_shadow, &E_shadow, nullptr);
+ if (!converged_filter_) {
+ const auto sum_of_squares = [](float a, float b) { return a + b * b; };
+ const float e2_main =
+ std::accumulate(e_main.begin(), e_main.end(), 0.f, sum_of_squares);
+ const float e2_shadow =
+ std::accumulate(e_shadow.begin(), e_shadow.end(), 0.f, sum_of_squares);
+ const float y2 = std::accumulate(y.begin(), y.end(), 0.f, sum_of_squares);
+
+ if (y2 > kBlockSize * 50.f * 50.f) {
+ converged_filter_ = (e2_main > 0.3 * y2 || e2_shadow > 0.1 * y2);
+ }
+ }
+
// Compute spectra for future use.
E_main.Spectrum(optimization_, &output->E2_main);
E_shadow.Spectrum(optimization_, &output->E2_shadow);
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h
index e76155494c0..680bf458066 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor.h
@@ -57,6 +57,8 @@ class Subtractor {
return main_filter_.FilterImpulseResponse();
}
+ bool ConvergedFilter() const { return converged_filter_; }
+
private:
const Aec3Fft fft_;
ApmDataDumper* data_dumper_;
@@ -65,6 +67,7 @@ class Subtractor {
AdaptiveFirFilter shadow_filter_;
MainFilterUpdateGain G_main_;
ShadowFilterUpdateGain G_shadow_;
+ bool converged_filter_ = false;
RTC_DISALLOW_IMPLICIT_CONSTRUCTORS(Subtractor);
};
diff --git a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc
index ea28c4e6638..0450b6ccd49 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc
+++ b/chromium/third_party/webrtc/modules/audio_processing/aec3/subtractor_unittest.cc
@@ -69,6 +69,7 @@ float RunSubtractorTest(int num_blocks_to_process,
aec_state.HandleEchoPathChange(EchoPathVariability(false, false));
aec_state.Update(subtractor.FilterFrequencyResponse(),
subtractor.FilterImpulseResponse(),
+ subtractor.ConvergedFilter(),
rtc::Optional<size_t>(delay_samples / kBlockSize),
render_buffer, E2_main, Y2, x[0], output.s_main, false);
}
diff --git a/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h b/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h
index 54aed341b63..6dd0e140aa3 100644
--- a/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h
+++ b/chromium/third_party/webrtc/modules/audio_processing/include/audio_processing.h
@@ -284,9 +284,11 @@ class AudioProcessing : public rtc::RefCountInterface {
struct EpStrength {
float lf = 10.f;
- float mf = 100.f;
- float hf = 200.f;
+ float mf = 10.f;
+ float hf = 10.f;
float default_len = 0.f;
+ bool echo_can_saturate = true;
+ bool bounded_erl = false;
} ep_strength;
struct Mask {
@@ -304,7 +306,6 @@ class AudioProcessing : public rtc::RefCountInterface {
struct EchoAudibility {
float low_render_limit = 4 * 64.f;
float normal_render_limit = 64.f;
- float active_render_limit = 100.f;
} echo_audibility;
struct RenderLevels {