summaryrefslogtreecommitdiff
path: root/chromium/content/renderer/media/speech_recognition_audio_sink.h
blob: d1200b2f29dbf8cda407d345bf15bc145f07d8e8 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
// Copyright 2014 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#ifndef CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_
#define CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_

#include "base/callback.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/shared_memory.h"
#include "base/sync_socket.h"
#include "base/threading/thread_checker.h"
#include "content/common/content_export.h"
#include "content/public/renderer/media_stream_audio_sink.h"
#include "media/audio/audio_parameters.h"
#include "media/base/audio_converter.h"
#include "third_party/WebKit/public/platform/WebMediaStreamTrack.h"

namespace media {
class AudioBus;
class AudioFifo;
}

namespace content {

// SpeechRecognitionAudioSink works as an audio sink to the
// WebRtcLocalAudioTrack. It stores the capture data into a FIFO.
// When the FIFO has enough data for resampling, it converts it,
// passes the buffer to the WebSpeechRecognizer via SharedMemory
// and notifies it via SyncSocket followed by incrementing the |buffer_index_|.
// WebSpeechRecognizer increments the shared buffer index to synchronize.
class CONTENT_EXPORT SpeechRecognitionAudioSink
    : NON_EXPORTED_BASE(public media::AudioConverter::InputCallback),
      NON_EXPORTED_BASE(public MediaStreamAudioSink) {
 public:
  typedef base::Callback<void()> OnStoppedCB;

  // Socket ownership is transferred to the class via constructor.
  SpeechRecognitionAudioSink(const blink::WebMediaStreamTrack& track,
                             const media::AudioParameters& params,
                             const base::SharedMemoryHandle memory,
                             scoped_ptr<base::SyncSocket> socket,
                             const OnStoppedCB& on_stopped_cb);

  ~SpeechRecognitionAudioSink() override;

  // Returns whether the provided track is supported.
  static bool IsSupportedTrack(const blink::WebMediaStreamTrack& track);

 private:
  // content::MediaStreamAudioSink implementation.
  void OnReadyStateChanged(
      blink::WebMediaStreamSource::ReadyState state) override;

  void OnData(const media::AudioBus& audio_bus,
              base::TimeTicks estimated_capture_time) override;
  void OnSetFormat(const media::AudioParameters& params) override;

  // media::AudioConverter::Inputcallback implementation.
  double ProvideInput(media::AudioBus* audio_bus,
                      base::TimeDelta buffer_delay) override;

  // Returns the pointer to the audio input buffer mapped in the shared memory.
  media::AudioInputBuffer* GetAudioInputBuffer() const;

  // Number of frames per buffer in FIFO. When the buffer is full we convert and
  // consume it on the |output_bus_|. Size of the buffer depends on the
  // resampler. Example: for 44.1 to 16.0 conversion, it should be 4100 frames.
  int fifo_buffer_size_;

  // Used to DCHECK that some methods are called on the main render thread.
  base::ThreadChecker main_render_thread_checker_;

  // Used to DCHECK that some methods are called on the capture audio thread.
  base::ThreadChecker capture_thread_checker_;

  // The audio track that this audio sink is connected to.
  const blink::WebMediaStreamTrack track_;

  // Shared memory used by audio buses on both browser and renderer processes.
  base::SharedMemory shared_memory_;

  // Socket for synchronization of audio bus reads/writes.
  // Created on the renderer client and passed here. Accessed on capture thread.
  scoped_ptr<base::SyncSocket> socket_;

  // Used as a resampler to deliver appropriate format to speech recognition.
  scoped_ptr<media::AudioConverter> audio_converter_;

  // FIFO is used for queuing audio frames before we resample.
  scoped_ptr<media::AudioFifo> fifo_;

  // Audio bus shared with the browser process via |shared_memory_|.
  scoped_ptr<media::AudioBus> output_bus_;

  // Params of the source audio. Can change when |OnSetFormat()| occurs.
  media::AudioParameters input_params_;

  // Params used by speech recognition.
  const media::AudioParameters output_params_;

  // Whether the track has been stopped.
  bool track_stopped_;

  // Local counter of audio buffers for synchronization.
  uint32 buffer_index_;

  // Callback for the renderer client. Called when the audio track was stopped.
  const OnStoppedCB on_stopped_cb_;

  DISALLOW_COPY_AND_ASSIGN(SpeechRecognitionAudioSink);
};

}  // namespace content

#endif  // CONTENT_RENDERER_MEDIA_SPEECH_RECOGNITION_AUDIO_SINK_H_