chromium/third_party/blink/renderer/modules/mediarecorder/vpx_encoder.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321

// Copyright 2017 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.

#include "third_party/blink/renderer/modules/mediarecorder/vpx_encoder.h"

#include <algorithm>

#include "base/system/sys_info.h"
#include "media/base/video_frame.h"
#include "third_party/blink/renderer/platform/instrumentation/tracing/trace_event.h"
#include "third_party/blink/renderer/platform/scheduler/public/post_cross_thread_task.h"
#include "third_party/blink/renderer/platform/scheduler/public/thread.h"
#include "third_party/blink/renderer/platform/wtf/cross_thread_functional.h"
#include "third_party/blink/renderer/platform/wtf/functional.h"
#include "ui/gfx/geometry/size.h"

using media::VideoFrame;
using media::VideoFrameMetadata;

namespace blink {

void VpxEncoder::VpxCodecDeleter::operator()(vpx_codec_ctx_t* codec) {
  if (!codec)
    return;
  vpx_codec_err_t ret = vpx_codec_destroy(codec);
  CHECK_EQ(ret, VPX_CODEC_OK);
  delete codec;
}

static int GetNumberOfThreadsForEncoding() {
  // Do not saturate CPU utilization just for encoding. On a lower-end system
  // with only 1 or 2 cores, use only one thread for encoding. On systems with
  // more cores, allow half of the cores to be used for encoding.
  return std::min(8, (base::SysInfo::NumberOfProcessors() + 1) / 2);
}

// static
void VpxEncoder::ShutdownEncoder(std::unique_ptr<Thread> encoding_thread,
                                 ScopedVpxCodecCtxPtr encoder) {
  DCHECK(encoding_thread);
  // Both |encoding_thread| and |encoder| will be destroyed at end-of-scope.
}

VpxEncoder::VpxEncoder(
    bool use_vp9,
    const VideoTrackRecorder::OnEncodedVideoCB& on_encoded_video_callback,
    int32_t bits_per_second,
    scoped_refptr<base::SingleThreadTaskRunner> main_task_runner)
    : VideoTrackRecorder::Encoder(on_encoded_video_callback,
                                  bits_per_second,
                                  std::move(main_task_runner)),
      use_vp9_(use_vp9) {
  codec_config_.g_timebase.den = 0;        // Not initialized.
  alpha_codec_config_.g_timebase.den = 0;  // Not initialized.
  DCHECK(encoding_thread_);
}

VpxEncoder::~VpxEncoder() {
  PostCrossThreadTask(
      *main_task_runner_.get(), FROM_HERE,
      CrossThreadBindOnce(&VpxEncoder::ShutdownEncoder,
                          std::move(encoding_thread_), std::move(encoder_)));
}

bool VpxEncoder::CanEncodeAlphaChannel() {
  return true;
}

void VpxEncoder::EncodeOnEncodingTaskRunner(scoped_refptr<VideoFrame> frame,
                                            base::TimeTicks capture_timestamp) {
  TRACE_EVENT0("media", "VpxEncoder::EncodeOnEncodingTaskRunner");
  DCHECK(encoding_task_runner_->BelongsToCurrentThread());

  if (frame->storage_type() == media::VideoFrame::STORAGE_GPU_MEMORY_BUFFER)
    frame = ConvertToI420ForSoftwareEncoder(frame);

  const gfx::Size frame_size = frame->visible_rect().size();
  base::TimeDelta duration = EstimateFrameDuration(*frame);
  const media::WebmMuxer::VideoParameters video_params(frame);

  if (!IsInitialized(codec_config_) ||
      gfx::Size(codec_config_.g_w, codec_config_.g_h) != frame_size) {
    ConfigureEncoderOnEncodingTaskRunner(frame_size, &codec_config_, &encoder_);
  }

  const bool frame_has_alpha = frame->format() == media::PIXEL_FORMAT_I420A;
  // Split the duration between two encoder instances if alpha is encoded.
  duration = frame_has_alpha ? duration / 2 : duration;
  if (frame_has_alpha && (!IsInitialized(alpha_codec_config_) ||
                          gfx::Size(alpha_codec_config_.g_w,
                                    alpha_codec_config_.g_h) != frame_size)) {
    ConfigureEncoderOnEncodingTaskRunner(frame_size, &alpha_codec_config_,
                                         &alpha_encoder_);
    u_plane_stride_ = media::VideoFrame::RowBytes(
        VideoFrame::kUPlane, frame->format(), frame_size.width());
    v_plane_stride_ = media::VideoFrame::RowBytes(
        VideoFrame::kVPlane, frame->format(), frame_size.width());
    v_plane_offset_ = media::VideoFrame::PlaneSize(
                          frame->format(), VideoFrame::kUPlane, frame_size)
                          .GetArea();
    alpha_dummy_planes_.resize(SafeCast<wtf_size_t>(
        v_plane_offset_ + media::VideoFrame::PlaneSize(
                              frame->format(), VideoFrame::kVPlane, frame_size)
                              .GetArea()));
    // It is more expensive to encode 0x00, so use 0x80 instead.
    std::fill(alpha_dummy_planes_.begin(), alpha_dummy_planes_.end(), 0x80);
  }
  // If we introduced a new alpha frame, force keyframe.
  const bool force_keyframe = frame_has_alpha && !last_frame_had_alpha_;
  last_frame_had_alpha_ = frame_has_alpha;

  std::string data;
  bool keyframe = false;
  DoEncode(encoder_.get(), frame_size, frame->data(VideoFrame::kYPlane),
           frame->visible_data(VideoFrame::kYPlane),
           frame->stride(VideoFrame::kYPlane),
           frame->visible_data(VideoFrame::kUPlane),
           frame->stride(VideoFrame::kUPlane),
           frame->visible_data(VideoFrame::kVPlane),
           frame->stride(VideoFrame::kVPlane), duration, force_keyframe, data,
           &keyframe);

  std::string alpha_data;
  if (frame_has_alpha) {
    bool alpha_keyframe = false;
    DoEncode(alpha_encoder_.get(), frame_size, frame->data(VideoFrame::kAPlane),
             frame->visible_data(VideoFrame::kAPlane),
             frame->stride(VideoFrame::kAPlane), alpha_dummy_planes_.data(),
             SafeCast<int>(u_plane_stride_),
             alpha_dummy_planes_.data() + v_plane_offset_,
             SafeCast<int>(v_plane_stride_), duration, keyframe, alpha_data,
             &alpha_keyframe);
    DCHECK_EQ(keyframe, alpha_keyframe);
  }
  frame = nullptr;

  PostCrossThreadTask(
      *origin_task_runner_.get(), FROM_HERE,
      CrossThreadBindOnce(
          OnFrameEncodeCompleted,
          WTF::Passed(CrossThreadBindRepeating(on_encoded_video_callback_)),
          video_params, std::move(data), std::move(alpha_data),
          capture_timestamp, keyframe));
}

void VpxEncoder::DoEncode(vpx_codec_ctx_t* const encoder,
                          const gfx::Size& frame_size,
                          uint8_t* const data,
                          uint8_t* const y_plane,
                          int y_stride,
                          uint8_t* const u_plane,
                          int u_stride,
                          uint8_t* const v_plane,
                          int v_stride,
                          const base::TimeDelta& duration,
                          bool force_keyframe,
                          std::string& output_data,
                          bool* const keyframe) {
  DCHECK(encoding_task_runner_->BelongsToCurrentThread());

  vpx_image_t vpx_image;
  vpx_image_t* const result =
      vpx_img_wrap(&vpx_image, VPX_IMG_FMT_I420, frame_size.width(),
                   frame_size.height(), 1 /* align */, data);
  DCHECK_EQ(result, &vpx_image);
  vpx_image.planes[VPX_PLANE_Y] = y_plane;
  vpx_image.planes[VPX_PLANE_U] = u_plane;
  vpx_image.planes[VPX_PLANE_V] = v_plane;
  vpx_image.stride[VPX_PLANE_Y] = y_stride;
  vpx_image.stride[VPX_PLANE_U] = u_stride;
  vpx_image.stride[VPX_PLANE_V] = v_stride;

  const vpx_codec_flags_t flags = force_keyframe ? VPX_EFLAG_FORCE_KF : 0;
  // Encode the frame.  The presentation time stamp argument here is fixed to
  // zero to force the encoder to base its single-frame bandwidth calculations
  // entirely on |predicted_frame_duration|.
  const vpx_codec_err_t ret =
      vpx_codec_encode(encoder, &vpx_image, 0 /* pts */,
                       static_cast<unsigned long>(duration.InMicroseconds()),
                       flags, VPX_DL_REALTIME);
  DCHECK_EQ(ret, VPX_CODEC_OK)
      << vpx_codec_err_to_string(ret) << ", #" << vpx_codec_error(encoder)
      << " -" << vpx_codec_error_detail(encoder);

  *keyframe = false;
  vpx_codec_iter_t iter = nullptr;
  const vpx_codec_cx_pkt_t* pkt = nullptr;
  while ((pkt = vpx_codec_get_cx_data(encoder, &iter))) {
    if (pkt->kind != VPX_CODEC_CX_FRAME_PKT)
      continue;
    output_data.assign(static_cast<char*>(pkt->data.frame.buf),
                       pkt->data.frame.sz);
    *keyframe = (pkt->data.frame.flags & VPX_FRAME_IS_KEY) != 0;
    break;
  }
}

void VpxEncoder::ConfigureEncoderOnEncodingTaskRunner(
    const gfx::Size& size,
    vpx_codec_enc_cfg_t* codec_config,
    ScopedVpxCodecCtxPtr* encoder) {
  DCHECK(encoding_task_runner_->BelongsToCurrentThread());
  if (IsInitialized(*codec_config)) {
    // TODO(mcasas) VP8 quirk/optimisation: If the new |size| is strictly less-
    // than-or-equal than the old size, in terms of area, the existing encoder
    // instance could be reused after changing |codec_config->{g_w,g_h}|.
    DVLOG(1) << "Destroying/Re-Creating encoder for new frame size: "
             << gfx::Size(codec_config->g_w, codec_config->g_h).ToString()
             << " --> " << size.ToString() << (use_vp9_ ? " vp9" : " vp8");
    encoder->reset();
  }

  const vpx_codec_iface_t* codec_interface =
      use_vp9_ ? vpx_codec_vp9_cx() : vpx_codec_vp8_cx();
  vpx_codec_err_t result = vpx_codec_enc_config_default(
      codec_interface, codec_config, 0 /* reserved */);
  DCHECK_EQ(VPX_CODEC_OK, result);

  DCHECK_EQ(320u, codec_config->g_w);
  DCHECK_EQ(240u, codec_config->g_h);
  DCHECK_EQ(256u, codec_config->rc_target_bitrate);
  // Use the selected bitrate or adjust default bit rate to account for the
  // actual size.  Note: |rc_target_bitrate| units are kbit per second.
  if (bits_per_second_ > 0) {
    codec_config->rc_target_bitrate = bits_per_second_ / 1000;
  } else {
    codec_config->rc_target_bitrate = size.GetArea() *
                                      codec_config->rc_target_bitrate /
                                      codec_config->g_w / codec_config->g_h;
  }
  // Both VP8/VP9 configuration should be Variable BitRate by default.
  DCHECK_EQ(VPX_VBR, codec_config->rc_end_usage);
  if (use_vp9_) {
    // Number of frames to consume before producing output.
    codec_config->g_lag_in_frames = 0;

    // DCHECK that the profile selected by default is I420 (magic number 0).
    DCHECK_EQ(0u, codec_config->g_profile);
  } else {
    // VP8 always produces frames instantaneously.
    DCHECK_EQ(0u, codec_config->g_lag_in_frames);
  }

  DCHECK(size.width());
  DCHECK(size.height());
  codec_config->g_w = size.width();
  codec_config->g_h = size.height();
  codec_config->g_pass = VPX_RC_ONE_PASS;

  // Timebase is the smallest interval used by the stream, can be set to the
  // frame rate or to e.g. microseconds.
  codec_config->g_timebase.num = 1;
  codec_config->g_timebase.den = base::Time::kMicrosecondsPerSecond;

  // Let the encoder decide where to place the Keyframes, between min and max.
  // In VPX_KF_AUTO mode libvpx will sometimes emit keyframes regardless of min/
  // max distance out of necessity.
  // Note that due to http://crbug.com/440223, it might be necessary to force a
  // key frame after 10,000frames since decoding fails after 30,000 non-key
  // frames.
  // Forcing a keyframe in regular intervals also allows seeking in the
  // resulting recording with decent performance.
  codec_config->kf_mode = VPX_KF_AUTO;
  codec_config->kf_min_dist = 0;
  codec_config->kf_max_dist = 100;

  codec_config->g_threads = GetNumberOfThreadsForEncoding();

  // Number of frames to consume before producing output.
  codec_config->g_lag_in_frames = 0;

  encoder->reset(new vpx_codec_ctx_t);
  const vpx_codec_err_t ret = vpx_codec_enc_init(
      encoder->get(), codec_interface, codec_config, 0 /* flags */);
  DCHECK_EQ(VPX_CODEC_OK, ret);

  if (use_vp9_) {
    // Values of VP8E_SET_CPUUSED greater than 0 will increase encoder speed at
    // the expense of quality up to a maximum value of 8 for VP9, by tuning the
    // target time spent encoding the frame. Go from 8 to 5 (values for real
    // time encoding) depending on the amount of cores available in the system.
    const int kCpuUsed =
        std::max(5, 8 - base::SysInfo::NumberOfProcessors() / 2);
    result = vpx_codec_control(encoder->get(), VP8E_SET_CPUUSED, kCpuUsed);
    DLOG_IF(WARNING, VPX_CODEC_OK != result) << "VP8E_SET_CPUUSED failed";
  }
}

bool VpxEncoder::IsInitialized(const vpx_codec_enc_cfg_t& codec_config) const {
  DCHECK(encoding_task_runner_->BelongsToCurrentThread());
  return codec_config.g_timebase.den != 0;
}

base::TimeDelta VpxEncoder::EstimateFrameDuration(const VideoFrame& frame) {
  DCHECK(encoding_task_runner_->BelongsToCurrentThread());

  using base::TimeDelta;
  base::TimeDelta predicted_frame_duration;
  if (!frame.metadata()->GetTimeDelta(VideoFrameMetadata::FRAME_DURATION,
                                      &predicted_frame_duration) ||
      predicted_frame_duration <= base::TimeDelta()) {
    // The source of the video frame did not provide the frame duration.  Use
    // the actual amount of time between the current and previous frame as a
    // prediction for the next frame's duration.
    // TODO(mcasas): This duration estimation could lead to artifacts if the
    // cadence of the received stream is compromised (e.g. camera freeze, pause,
    // remote packet loss).  Investigate using GetFrameRate() in this case.
    predicted_frame_duration = frame.timestamp() - last_frame_timestamp_;
  }
  last_frame_timestamp_ = frame.timestamp();
  // Make sure |predicted_frame_duration| is in a safe range of values.
  const base::TimeDelta kMaxFrameDuration =
      base::TimeDelta::FromSecondsD(1.0 / 8);
  const base::TimeDelta kMinFrameDuration =
      base::TimeDelta::FromMilliseconds(1);
  return std::min(kMaxFrameDuration,
                  std::max(predicted_frame_duration, kMinFrameDuration));
}

}  // namespace blink