summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSebastian Dröge <sebastian@centricular.com>2015-10-30 20:57:37 +0200
committerSebastian Dröge <sebastian@centricular.com>2015-11-03 20:35:41 +0200
commit4df2ffaad608b632302e04d9be2fdd90ea6e1e31 (patch)
tree9314ac3393b48be31b96a024add3f31017780775
parent6ffb90e0375de1a31ff2f1bd6dc8a353fce976ff (diff)
downloadgstreamer-plugins-base-4df2ffaad608b632302e04d9be2fdd90ea6e1e31.tar.gz
opusenc: Encode exactly the amount of samples we got as input and put correct timestamps on it
The first frame has lookahead less samples, the last frame might have some padding or we might have to encode another frame of silence to get all our input into the encoded data. This is because of a) the lookahead at the beginning of the encoding, which shifts all data by that amount of samples and b) the padding needed to fill the very last frame completely. Ideally we would use LPC to calculate something better than silence for the padding to make the encoding as smooth as possible. With this we get exactly the same amount of samples again in an opusenc ! opusdec pipeline. https://bugzilla.gnome.org/show_bug.cgi?id=757153
-rw-r--r--ext/opus/gstopusenc.c66
-rw-r--r--ext/opus/gstopusenc.h3
2 files changed, 57 insertions, 12 deletions
diff --git a/ext/opus/gstopusenc.c b/ext/opus/gstopusenc.c
index 62773278b..94c43ddf3 100644
--- a/ext/opus/gstopusenc.c
+++ b/ext/opus/gstopusenc.c
@@ -412,6 +412,7 @@ gst_opus_enc_start (GstAudioEncoder * benc)
GST_DEBUG_OBJECT (enc, "start");
enc->encoded_samples = 0;
+ enc->consumed_samples = 0;
return TRUE;
}
@@ -766,6 +767,7 @@ gst_opus_enc_setup (GstOpusEnc * enc)
lookahead);
/* lookahead is samples, the Opus header wants it in 48kHz samples */
+ enc->lookahead = enc->pending_lookahead = lookahead;
lookahead = lookahead * 48000 / enc->sample_rate;
gst_opus_header_create_caps (&caps, NULL, lookahead, enc->sample_rate,
@@ -807,6 +809,7 @@ gst_opus_enc_sink_event (GstAudioEncoder * benc, GstEvent * event)
}
case GST_EVENT_SEGMENT:
enc->encoded_samples = 0;
+ enc->consumed_samples = 0;
break;
default:
@@ -899,13 +902,13 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
GstClockTime duration;
guint max_payload_size;
- gint frame_samples;
+ gint frame_samples, input_samples, output_samples;
g_mutex_lock (&enc->property_lock);
bytes = enc->frame_samples * enc->n_channels * 2;
max_payload_size = enc->max_payload_size;
- frame_samples = enc->frame_samples;
+ frame_samples = input_samples = enc->frame_samples;
g_mutex_unlock (&enc->property_lock);
@@ -915,20 +918,23 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
bsize = map.size;
if (G_UNLIKELY (bsize % bytes)) {
+ gint64 diff;
+
GST_DEBUG_OBJECT (enc, "draining; adding silence samples");
+ g_assert (bsize < bytes);
/* If encoding part of a frame, and we have no set stop time on
* the output segment, we update the segment stop time to reflect
* the last sample. This will let oggmux set the last page's
* granpos to tell a decoder the dummy samples should be clipped.
*/
+ input_samples = bsize / (enc->n_channels * 2);
segment = &GST_AUDIO_ENCODER_OUTPUT_SEGMENT (enc);
if (!GST_CLOCK_TIME_IS_VALID (segment->stop)) {
- int input_samples = bsize / (enc->n_channels * 2);
GST_DEBUG_OBJECT (enc,
"No stop time and partial frame, updating segment");
duration =
- gst_util_uint64_scale (enc->encoded_samples + input_samples,
+ gst_util_uint64_scale_ceil (enc->consumed_samples + input_samples,
GST_SECOND, enc->sample_rate);
segment->stop = segment->start + duration;
GST_DEBUG_OBJECT (enc, "new output segment %" GST_SEGMENT_FORMAT,
@@ -937,6 +943,21 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
gst_event_new_segment (segment));
}
+ diff =
+ (enc->encoded_samples + frame_samples) - (enc->consumed_samples +
+ input_samples);
+ if (diff >= 0) {
+ GST_DEBUG_OBJECT (enc,
+ "%" G_GINT64_FORMAT " extra samples of padding in this frame",
+ diff);
+ output_samples = frame_samples - diff;
+ } else {
+ GST_DEBUG_OBJECT (enc,
+ "Need to add %" G_GINT64_FORMAT " extra samples in the next frame",
+ -diff);
+ output_samples = frame_samples;
+ }
+
size = ((bsize / bytes) + 1) * bytes;
mdata = g_malloc0 (size);
memcpy (mdata, bdata, bsize);
@@ -944,10 +965,34 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
} else {
data = bdata;
size = bsize;
+
+ /* Adjust for lookahead here */
+ if (enc->pending_lookahead) {
+ if (input_samples > enc->pending_lookahead) {
+ output_samples = input_samples - enc->pending_lookahead;
+ enc->pending_lookahead = 0;
+ } else {
+ enc->pending_lookahead -= input_samples;
+ output_samples = 0;
+ }
+ } else {
+ output_samples = input_samples;
+ }
}
} else {
- GST_DEBUG_OBJECT (enc, "nothing to drain");
- goto done;
+ if (enc->encoded_samples < enc->consumed_samples) {
+ data = mdata = g_malloc0 (bytes);
+ size = bytes;
+ output_samples = enc->consumed_samples - enc->encoded_samples;
+ input_samples = 0;
+ GST_DEBUG_OBJECT (enc, "draining %d samples", output_samples);
+ } else if (enc->encoded_samples == enc->consumed_samples) {
+ GST_DEBUG_OBJECT (enc, "nothing to drain");
+ goto done;
+ } else {
+ g_assert_not_reached ();
+ goto done;
+ }
}
g_assert (size == bytes);
@@ -963,9 +1008,6 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
gst_buffer_map (outbuf, &omap, GST_MAP_WRITE);
- GST_DEBUG_OBJECT (enc, "encoding %d samples (%d bytes)",
- frame_samples, (int) bytes);
-
outsize =
opus_multistream_encode (enc->state, (const gint16 *) data,
frame_samples, omap.data, max_payload_size * enc->n_channels);
@@ -987,10 +1029,12 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
GST_DEBUG_OBJECT (enc, "Output packet is %u bytes", outsize);
gst_buffer_set_size (outbuf, outsize);
+
ret =
gst_audio_encoder_finish_frame (GST_AUDIO_ENCODER (enc), outbuf,
- frame_samples);
- enc->encoded_samples += frame_samples;
+ output_samples);
+ enc->encoded_samples += output_samples;
+ enc->consumed_samples += input_samples;
done:
diff --git a/ext/opus/gstopusenc.h b/ext/opus/gstopusenc.h
index 43eaac8d2..f447292af 100644
--- a/ext/opus/gstopusenc.h
+++ b/ext/opus/gstopusenc.h
@@ -79,7 +79,8 @@ struct _GstOpusEnc {
gint n_channels;
gint sample_rate;
- guint64 encoded_samples;
+ guint64 encoded_samples, consumed_samples;
+ guint16 lookahead, pending_lookahead;
guint8 channel_mapping_family;
guint8 encoding_channel_mapping[256];