opusenc: Encode exactly the amount of samples we got as input and put correct timestamps on it

The first frame has lookahead less samples, the last frame might have some padding or we might have to encode another frame of silence to get all our input into the encoded data. This is because of a) the lookahead at the beginning of the encoding, which shifts all data by that amount of samples and b) the padding needed to fill the very last frame completely. Ideally we would use LPC to calculate something better than silence for the padding to make the encoding as smooth as possible. With this we get exactly the same amount of samples again in an opusenc ! opusdec pipeline. https://bugzilla.gnome.org/show_bug.cgi?id=757153
author: Sebastian Dröge <sebastian@centricular.com> 2015-10-30 20:57:37 +0200
committer: Sebastian Dröge <sebastian@centricular.com> 2015-11-03 20:35:41 +0200
commit: 4df2ffaad608b632302e04d9be2fdd90ea6e1e31 (patch)
tree: 9314ac3393b48be31b96a024add3f31017780775
parent: 6ffb90e0375de1a31ff2f1bd6dc8a353fce976ff (diff)
download: gstreamer-plugins-base-4df2ffaad608b632302e04d9be2fdd90ea6e1e31.tar.gz
2 files changed, 57 insertions, 12 deletions
diff --git a/ext/opus/gstopusenc.c b/ext/opus/gstopusenc.c
index 62773278b..94c43ddf3 100644
--- a/ext/opus/gstopusenc.c
+++ b/ext/opus/gstopusenc.c
@@ -412,6 +412,7 @@ gst_opus_enc_start (GstAudioEncoder * benc)
 
   GST_DEBUG_OBJECT (enc, "start");
   enc->encoded_samples = 0;
+  enc->consumed_samples = 0;
 
   return TRUE;
 }
@@ -766,6 +767,7 @@ gst_opus_enc_setup (GstOpusEnc * enc)
       lookahead);
 
   /* lookahead is samples, the Opus header wants it in 48kHz samples */
+  enc->lookahead = enc->pending_lookahead = lookahead;
   lookahead = lookahead * 48000 / enc->sample_rate;
 
   gst_opus_header_create_caps (&caps, NULL, lookahead, enc->sample_rate,
@@ -807,6 +809,7 @@ gst_opus_enc_sink_event (GstAudioEncoder * benc, GstEvent * event)
     }
     case GST_EVENT_SEGMENT:
       enc->encoded_samples = 0;
+      enc->consumed_samples = 0;
       break;
 
     default:
@@ -899,13 +902,13 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
   GstClockTime duration;
 
   guint max_payload_size;
-  gint frame_samples;
+  gint frame_samples, input_samples, output_samples;
 
   g_mutex_lock (&enc->property_lock);
 
   bytes = enc->frame_samples * enc->n_channels * 2;
   max_payload_size = enc->max_payload_size;
-  frame_samples = enc->frame_samples;
+  frame_samples = input_samples = enc->frame_samples;
 
   g_mutex_unlock (&enc->property_lock);
 
@@ -915,20 +918,23 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
     bsize = map.size;
 
     if (G_UNLIKELY (bsize % bytes)) {
+      gint64 diff;
+
       GST_DEBUG_OBJECT (enc, "draining; adding silence samples");
+      g_assert (bsize < bytes);
 
       /* If encoding part of a frame, and we have no set stop time on
        * the output segment, we update the segment stop time to reflect
        * the last sample. This will let oggmux set the last page's
        * granpos to tell a decoder the dummy samples should be clipped.
        */
+      input_samples = bsize / (enc->n_channels * 2);
       segment = &GST_AUDIO_ENCODER_OUTPUT_SEGMENT (enc);
       if (!GST_CLOCK_TIME_IS_VALID (segment->stop)) {
-        int input_samples = bsize / (enc->n_channels * 2);
         GST_DEBUG_OBJECT (enc,
             "No stop time and partial frame, updating segment");
         duration =
-            gst_util_uint64_scale (enc->encoded_samples + input_samples,
+            gst_util_uint64_scale_ceil (enc->consumed_samples + input_samples,
             GST_SECOND, enc->sample_rate);
         segment->stop = segment->start + duration;
         GST_DEBUG_OBJECT (enc, "new output segment %" GST_SEGMENT_FORMAT,
@@ -937,6 +943,21 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
             gst_event_new_segment (segment));
       }
 
+      diff =
+          (enc->encoded_samples + frame_samples) - (enc->consumed_samples +
+          input_samples);
+      if (diff >= 0) {
+        GST_DEBUG_OBJECT (enc,
+            "%" G_GINT64_FORMAT " extra samples of padding in this frame",
+            diff);
+        output_samples = frame_samples - diff;
+      } else {
+        GST_DEBUG_OBJECT (enc,
+            "Need to add %" G_GINT64_FORMAT " extra samples in the next frame",
+            -diff);
+        output_samples = frame_samples;
+      }
+
       size = ((bsize / bytes) + 1) * bytes;
       mdata = g_malloc0 (size);
       memcpy (mdata, bdata, bsize);
@@ -944,10 +965,34 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
     } else {
       data = bdata;
       size = bsize;
+
+      /* Adjust for lookahead here */
+      if (enc->pending_lookahead) {
+        if (input_samples > enc->pending_lookahead) {
+          output_samples = input_samples - enc->pending_lookahead;
+          enc->pending_lookahead = 0;
+        } else {
+          enc->pending_lookahead -= input_samples;
+          output_samples = 0;
+        }
+      } else {
+        output_samples = input_samples;
+      }
     }
   } else {
-    GST_DEBUG_OBJECT (enc, "nothing to drain");
-    goto done;
+    if (enc->encoded_samples < enc->consumed_samples) {
+      data = mdata = g_malloc0 (bytes);
+      size = bytes;
+      output_samples = enc->consumed_samples - enc->encoded_samples;
+      input_samples = 0;
+      GST_DEBUG_OBJECT (enc, "draining %d samples", output_samples);
+    } else if (enc->encoded_samples == enc->consumed_samples) {
+      GST_DEBUG_OBJECT (enc, "nothing to drain");
+      goto done;
+    } else {
+      g_assert_not_reached ();
+      goto done;
+    }
   }
 
   g_assert (size == bytes);
@@ -963,9 +1008,6 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
 
   gst_buffer_map (outbuf, &omap, GST_MAP_WRITE);
 
-  GST_DEBUG_OBJECT (enc, "encoding %d samples (%d bytes)",
-      frame_samples, (int) bytes);
-
   outsize =
       opus_multistream_encode (enc->state, (const gint16 *) data,
       frame_samples, omap.data, max_payload_size * enc->n_channels);
@@ -987,10 +1029,12 @@ gst_opus_enc_encode (GstOpusEnc * enc, GstBuffer * buf)
   GST_DEBUG_OBJECT (enc, "Output packet is %u bytes", outsize);
   gst_buffer_set_size (outbuf, outsize);
 
+
   ret =
       gst_audio_encoder_finish_frame (GST_AUDIO_ENCODER (enc), outbuf,
-      frame_samples);
-  enc->encoded_samples += frame_samples;
+      output_samples);
+  enc->encoded_samples += output_samples;
+  enc->consumed_samples += input_samples;
 
 done:
 
diff --git a/ext/opus/gstopusenc.h b/ext/opus/gstopusenc.h
index 43eaac8d2..f447292af 100644
--- a/ext/opus/gstopusenc.h
+++ b/ext/opus/gstopusenc.h
@@ -79,7 +79,8 @@ struct _GstOpusEnc {
   gint                  n_channels;
   gint                  sample_rate;
 
-  guint64               encoded_samples;
+  guint64               encoded_samples, consumed_samples;
+  guint16               lookahead, pending_lookahead;
 
   guint8                channel_mapping_family;
   guint8                encoding_channel_mapping[256];
author	Sebastian Dröge <sebastian@centricular.com>	2015-10-30 20:57:37 +0200
committer	Sebastian Dröge <sebastian@centricular.com>	2015-11-03 20:35:41 +0200
commit	4df2ffaad608b632302e04d9be2fdd90ea6e1e31 (patch)
tree	9314ac3393b48be31b96a024add3f31017780775
parent	6ffb90e0375de1a31ff2f1bd6dc8a353fce976ff (diff)
download	gstreamer-plugins-base-4df2ffaad608b632302e04d9be2fdd90ea6e1e31.tar.gz