chromium/media/webrtc/audio_processor.h


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269

// Copyright 2021 The Chromium Authors
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef MEDIA_WEBRTC_AUDIO_PROCESSOR_H_
#define MEDIA_WEBRTC_AUDIO_PROCESSOR_H_

#include <memory>

#include "base/callback.h"
#include "base/component_export.h"
#include "base/files/file.h"
#include "base/memory/scoped_refptr.h"
#include "base/sequence_checker.h"
#include "base/thread_annotations.h"
#include "base/time/time.h"
#include "media/base/audio_parameters.h"
#include "media/base/audio_processing.h"
#include "media/base/audio_push_fifo.h"
#include "media/webrtc/audio_delay_stats_reporter.h"
#include "third_party/abseil-cpp/absl/types/optional.h"
#include "third_party/webrtc/modules/audio_processing/include/audio_processing.h"
#include "third_party/webrtc/modules/audio_processing/include/audio_processing_statistics.h"
#include "third_party/webrtc/rtc_base/task_queue.h"

namespace media {
class AudioBus;
class AudioProcessorCaptureBus;
class AudioProcessorCaptureFifo;

// This class applies audio processing effects such as echo cancellation and
// noise suppression to input capture audio (such as a microphone signal).
// Effects are applied based on configuration from AudioProcessingSettings, and
// mainly rely on an instance of the webrtc::AudioProcessing module (APM) owned
// by the AudioProcessor.
//
// The AudioProcessor can handle up to three threads (in practice, sequences):
// - An owning sequence, which performs construction, destruction, diagnostic
// recordings, and similar signals.
// - A capture thread, which calls ProcessCapturedAudio().
// - A playout thread, which calls OnPlayoutData().
//
// All member functions must be called on the owning sequence unless
// specifically documented otherwise.
//
// Thread-safe exceptions to this scheme are explicitly documented as such.
class COMPONENT_EXPORT(MEDIA_WEBRTC) AudioProcessor {
 public:
  // Callback for consuming processed capture audio.
  // |audio_bus| contains the most recent processed capture audio.
  // |new_volume| specifies a new microphone volume from the AGC. The new
  // microphone volume range is [0.0, 1.0], and is only set if the microphone
  // volume should be adjusted.
  // Called on the capture thread.
  using DeliverProcessedAudioCallback =
      base::RepeatingCallback<void(const media::AudioBus& audio_bus,
                                   base::TimeTicks audio_capture_time,
                                   absl::optional<double> new_volume)>;

  using LogCallback = base::RepeatingCallback<void(base::StringPiece)>;

  // |deliver_processed_audio_callback| is used to deliver frames of processed
  // capture audio, from ProcessCapturedAudio(), and has to be valid for as long
  // as ProcessCapturedAudio() may be called.
  // |log_callback| is used for logging messages on the owning sequence.
  // |input_format| specifies the format of the incoming capture data.
  // |output_format| specifies the output format. If
  // |settings|.NeedWebrtcAudioProcessing() is true, then the output must be in
  // 10 ms chunks: the formats must specify |sample rate|/100 samples per buffer
  // (rounded down). Sample rates which are not divisible by 100 are supported
  // on a best-effort basis, audio quality may suffer.
  static std::unique_ptr<AudioProcessor> Create(
      DeliverProcessedAudioCallback deliver_processed_audio_callback,
      LogCallback log_callback,
      const AudioProcessingSettings& settings,
      const media::AudioParameters& input_format,
      const media::AudioParameters& output_format);

  // See Create() for details.
  AudioProcessor(
      DeliverProcessedAudioCallback deliver_processed_audio_callback,
      LogCallback log_callback,
      const media::AudioParameters& input_format,
      const media::AudioParameters& output_format,
      rtc::scoped_refptr<webrtc::AudioProcessing> webrtc_audio_processing,
      bool stereo_mirroring);

  ~AudioProcessor();

  AudioProcessor(const AudioProcessor&) = delete;
  AudioProcessor& operator=(const AudioProcessor&) = delete;

  // Processes capture audio and delivers in chunks of <= 10 ms to
  // |deliver_processed_audio_callback_|: Each call to ProcessCapturedAudio()
  // method triggers zero or more calls to |deliver_processed_audio_callback_|,
  // depending on internal FIFO size and content. |num_preferred_channels| is
  // the highest number of channels that any sink is interested in. This can be
  // different from the number of channels in the output format. A value of -1
  // means an unknown number. If |settings|.multi_channel_capture_processing is
  // true, the number of channels of the output of the Audio Processing Module
  // (APM) will be equal to the highest observed value of num_preferred_channels
  // as long as it does not exceed the number of channels of the output format.
  // |volume| specifies the current microphone volume, in the range [0.0, 1.0].
  // Must be called on the capture thread.
  void ProcessCapturedAudio(const media::AudioBus& audio_source,
                            base::TimeTicks audio_capture_time,
                            int num_preferred_channels,
                            double volume,
                            bool key_pressed);

  // Analyzes playout audio for e.g. echo cancellation.
  // Must be called on the playout thread.
  void OnPlayoutData(const media::AudioBus& audio_bus,
                     int sample_rate,
                     base::TimeDelta audio_delay);

  // Accessor to check if WebRTC audio processing is enabled or not.
  bool has_webrtc_audio_processing() const {
    DCHECK_CALLED_ON_VALID_SEQUENCE(owning_sequence_);
    return !!webrtc_audio_processing_;
  }

  // Instructs the Audio Processing Module (APM) to reduce its complexity when
  // |muted| is true. This mode is triggered when all audio tracks are disabled.
  // The default APM complexity mode is restored by |muted| set to false.
  void SetOutputWillBeMuted(bool muted);

  // Starts a new diagnostic audio recording (aecdump). If an aecdump recording
  // is already ongoing, it is stopped before starting the new one.
  void OnStartDump(base::File dump_file);

  // Stops any ongoing aecdump.
  void OnStopDump();

  // Returns any available statistics from the WebRTC audio processing module.
  // May be called on any thread.
  webrtc::AudioProcessingStats GetStats();

  absl::optional<webrtc::AudioProcessing::Config>
  GetAudioProcessingModuleConfigForTesting() const {
    if (webrtc_audio_processing_) {
      return webrtc_audio_processing_->GetConfig();
    }
    return absl::nullopt;
  }

  // The format of audio input to and output from the processor; constant
  // throughout AudioProcessor lifetime.
  const media::AudioParameters& input_format() const { return input_format_; }
  const media::AudioParameters& output_format() const { return output_format_; }

  // Returns an input format compatible with the specified audio processing
  // settings and device parameters. Returns nullopt if no compatible format can
  // be produced.
  static absl::optional<AudioParameters> ComputeInputFormat(
      const AudioParameters& device_format,
      const AudioProcessingSettings& settings);

  // Returns an output format that minimizes delay and resampling for given
  // input format and audio processing settings.
  static AudioParameters GetDefaultOutputFormat(
      const AudioParameters& input_format,
      const AudioProcessingSettings& settings);

 private:
  friend class AudioProcessorTest;

  // Called by ProcessCapturedAudio().
  // Returns the new microphone volume in the range of |0.0, 1.0], or unset if
  // the volume should not be updated.
  // |num_preferred_channels| is the highest number of channels that any sink is
  // interested in. This can be different from the number of channels in the
  // output format. A value of -1 means an unknown number. If
  // |settings|.multi_channel_capture_processing is true, the number of
  // channels of the output of the Audio Processing Module (APM) will be equal
  // to the highest observed value of num_preferred_channels as long as it does
  // not exceed the number of channels of the output format.
  // Called on the capture thread.
  absl::optional<double> ProcessData(const float* const* process_ptrs,
                                     int process_frames,
                                     base::TimeDelta capture_delay,
                                     double volume,
                                     bool key_pressed,
                                     int num_preferred_channels,
                                     float* const* output_ptrs);

  // Used as callback from |playout_fifo_| in OnPlayoutData().
  // Called on the playout thread.
  void AnalyzePlayoutData(const AudioBus& audio_bus, int frame_delay);

  void SendLogMessage(const std::string& message)
      VALID_CONTEXT_REQUIRED(owning_sequence_);

  SEQUENCE_CHECKER(owning_sequence_);

  // The WebRTC audio processing module (APM). Performs the bulk of the audio
  // processing and resampling algorithms.
  const rtc::scoped_refptr<webrtc::AudioProcessing> webrtc_audio_processing_;

  // If true, then the audio processor should swap the left and right channel of
  // captured stereo audio.
  const bool stereo_mirroring_;

  // Members accessed only by the owning sequence:

  // Used by SendLogMessage.
  const LogCallback log_callback_ GUARDED_BY_CONTEXT(owning_sequence_);

  // Low-priority task queue for doing AEC dump recordings. It has to
  // created/destroyed on the same sequence and it must outlive
  // any aecdump recording in |webrtc_audio_processing_|.
  std::unique_ptr<rtc::TaskQueue> worker_queue_
      GUARDED_BY_CONTEXT(owning_sequence_);

  // Cached value for the playout delay latency. Updated on the playout thread
  // and read on the capture thread.
  std::atomic<base::TimeDelta> playout_delay_{base::TimeDelta()};

  // Members configured on the owning sequence in the constructor and
  // used on the capture thread:

  // FIFO to provide capture audio in chunks that can be processed by
  // webrtc::AudioProcessing.
  std::unique_ptr<AudioProcessorCaptureFifo> capture_fifo_;

  // Receives APM processing output.
  std::unique_ptr<AudioProcessorCaptureBus> output_bus_;

  // Input and output formats for capture processing.
  const media::AudioParameters input_format_;
  const media::AudioParameters output_format_;

  // Members accessed only on the capture thread:

  // Consumer of processed capture audio in ProcessCapturedAudio().
  const DeliverProcessedAudioCallback deliver_processed_audio_callback_;

  // Observed maximum number of preferred output channels. Used for not
  // performing audio processing on more channels than the sinks are interested
  // in. The value is a maximum over time and can increase but never decrease.
  // If |settings|.multi_channel_capture_processing is true, Audio Processing
  // Module (APM) will output max_num_preferred_output_channels_ channels as
  // long as it does not exceed the number of channels of the output format.
  int max_num_preferred_output_channels_ = 1;

  // For reporting audio delay stats.
  media::AudioDelayStatsReporter audio_delay_stats_reporter_;

  // Members accessed only on the playout thread:

  // FIFO to provide playout audio in chunks that can be processed by
  // webrtc::AudioProcessing.
  AudioPushFifo playout_fifo_;

  // Cached value of the playout delay before adjusting for delay introduced by
  // |playout_fifo_|.
  base::TimeDelta unbuffered_playout_delay_ = base::TimeDelta();

  // The sample rate of incoming playout audio.
  absl::optional<int> playout_sample_rate_hz_ = absl::nullopt;

  // Counters to avoid excessively logging errors on a real-time thread.
  size_t apm_playout_error_code_log_count_ = 0;
  size_t large_delay_log_count_ = 0;
};

}  // namespace media

#endif  // MEDIA_WEBRTC_AUDIO_PROCESSOR_H_