diff options
author | Jean-Marc Valin <jmvalin@amazon.com> | 2022-12-06 18:01:53 -0500 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@amazon.com> | 2022-12-06 18:01:53 -0500 |
commit | 97fa4393589d00fbc541d7b7fb36c82032ab9e09 (patch) | |
tree | a4cea343563295d84fe4776a62a1319a5837a97b | |
parent | 9e1ee2db214da345720d619ebc82a1b03198d961 (diff) | |
download | opus-97fa4393589d00fbc541d7b7fb36c82032ab9e09.tar.gz |
DRED cleanup, support for variable number of frames
-rw-r--r-- | silk/dred_decoder.c | 10 | ||||
-rw-r--r-- | silk/dred_decoder.h | 1 | ||||
-rw-r--r-- | silk/dred_encoder.c | 38 | ||||
-rw-r--r-- | silk/dred_encoder.h | 4 | ||||
-rw-r--r-- | src/opus_decoder.c | 4 | ||||
-rw-r--r-- | src/opus_encoder.c | 2 |
6 files changed, 34 insertions, 25 deletions
diff --git a/silk/dred_decoder.c b/silk/dred_decoder.c index a8e0abbd..acbe091d 100644 --- a/silk/dred_decoder.c +++ b/silk/dred_decoder.c @@ -53,6 +53,7 @@ void dred_decode_redundancy_package(DREDDec *dec, float *features, const opus_ui const opus_uint16 *quant_scales = DRED_rdovae_get_quant_scales_pointer(); const opus_uint16 *r = DRED_rdovae_get_r_pointer(); + ec_dec ec; int q_level; int i; int offset; @@ -64,17 +65,20 @@ void dred_decode_redundancy_package(DREDDec *dec, float *features, const opus_ui celt_assert(DRED_NUM_REDUNDANCY_FRAMES % 2 == 0); /* decode initial state and initialize RDOVAE decoder */ - ec_dec_init(&dec->ec_dec, (unsigned char*)bytes, num_bytes); - dred_decode_state(&dec->ec_dec, state); + ec_dec_init(&ec, (unsigned char*)bytes, num_bytes); + dred_decode_state(&ec, state); DRED_rdovae_dec_init_states(dec->rdovae_dec, state); /* decode newest to oldest and store oldest to newest */ for (i = 0; i < DRED_NUM_REDUNDANCY_FRAMES; i += 2) { + /* FIXME: Figure out how to avoid missing a last frame that would take up < 8 bits. */ + if (8*num_bytes - ec_tell(&ec) <= 7) + break; q_level = (int) round(DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2)); offset = q_level * DRED_LATENT_DIM; dred_decode_latents( - &dec->ec_dec, + &ec, latents, quant_scales + offset, r + offset, diff --git a/silk/dred_decoder.h b/silk/dred_decoder.h index 15b5182b..16f6e74e 100644 --- a/silk/dred_decoder.h +++ b/silk/dred_decoder.h @@ -30,7 +30,6 @@ #include "entcode.h" typedef struct { - ec_dec ec_dec; RDOVAEDec *rdovae_dec; } DREDDec; diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c index a77e273e..6ebf313f 100644 --- a/silk/dred_encoder.c +++ b/silk/dred_encoder.c @@ -55,10 +55,11 @@ void dred_encode_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame) const opus_uint16 *p0 = DRED_rdovae_get_p0_pointer(); const opus_uint16 *quant_scales = DRED_rdovae_get_quant_scales_pointer(); const opus_uint16 *r = DRED_rdovae_get_r_pointer(); - + float feature_buffer[2 * 36]; + float input_buffer[2*DRED_NUM_FEATURES] = {0}; + ec_enc ec_encoder; - int bytes; int q_level; int i; int offset; @@ -71,27 +72,28 @@ void dred_encode_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame) memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM * sizeof(*enc->latents_buffer)); /* calculate LPCNet features */ - lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer, enc->feature_buffer); - lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + 36); + lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer, feature_buffer); + lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, feature_buffer + 36); /* prepare input buffer (discard LPC coefficients) */ - memcpy(input_buffer, enc->feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0])); - memcpy(input_buffer + DRED_NUM_FEATURES, enc->feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0])); + memcpy(input_buffer, feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0])); + memcpy(input_buffer + DRED_NUM_FEATURES, feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0])); /* run RDOVAE encoder */ DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, input_buffer); + enc->latents_buffer_fill = IMIN(enc->latents_buffer_fill+1, DRED_NUM_REDUNDANCY_FRAMES); /* entropy coding of state and latents */ - ec_enc_init(&enc->ec_encoder, enc->ec_buffer, DRED_MAX_DATA_SIZE); - dred_encode_state(&enc->ec_encoder, enc->state_buffer); + ec_enc_init(&ec_encoder, enc->ec_buffer, DRED_MAX_DATA_SIZE); + dred_encode_state(&ec_encoder, enc->state_buffer); - for (i = 0; i < DRED_NUM_REDUNDANCY_FRAMES; i += 2) + for (i = 0; i < enc->latents_buffer_fill-1; i += 2) { - q_level = (int) round(DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2)); + q_level = (int) floor(0.5f + DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2)); offset = q_level * DRED_LATENT_DIM; dred_encode_latents( - &enc->ec_encoder, + &ec_encoder, enc->latents_buffer + i * DRED_LATENT_DIM, quant_scales + offset, dead_zone + offset, @@ -100,20 +102,20 @@ void dred_encode_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame) ); } - bytes = (ec_tell(&enc->ec_encoder)+7)/8; - ec_enc_shrink(&enc->ec_encoder, bytes); - ec_enc_done(&enc->ec_encoder); + enc->ec_buffer_fill = (ec_tell(&ec_encoder)+7)/8; + ec_enc_shrink(&ec_encoder, enc->ec_buffer_fill); + ec_enc_done(&ec_encoder); -#if 1 - printf("packet size: %d\n", bytes*8); +#if 0 + printf("packet size: %d\n", enc->ec_buffer_fill*8); static FILE *fbs = NULL; if (fbs == NULL) { fbs = fopen("dred_bitstream.bin", "wb"); } - fwrite(&bytes, sizeof(bytes), 1, fbs); - fwrite(ec_get_buffer(&enc->ec_encoder), 1, bytes, fbs); + fwrite(&enc->ec_buffer_fill, sizeof(enc->ec_buffer_fill), 1, fbs); + fwrite(ec_get_buffer(&ec_encoder), 1, enc->ec_buffer_fill, fbs); #endif #if 0 diff --git a/silk/dred_encoder.h b/silk/dred_encoder.h index e5f67c0e..9bb96b96 100644 --- a/silk/dred_encoder.h +++ b/silk/dred_encoder.h @@ -37,11 +37,11 @@ typedef struct { opus_int16 input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY]; - float feature_buffer[2 * 36]; float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM]; + int latents_buffer_fill; float state_buffer[24]; unsigned char ec_buffer[DRED_MAX_DATA_SIZE]; - ec_enc ec_encoder; + int ec_buffer_fill; LPCNetEncState *lpcnet_enc_state; RDOVAEEnc *rdovae_enc; } DREDEnc; diff --git a/src/opus_decoder.c b/src/opus_decoder.c index 2ca93f44..ac308754 100644 --- a/src/opus_decoder.c +++ b/src/opus_decoder.c @@ -150,6 +150,9 @@ int opus_decoder_init(OpusDecoder *st, opus_int32 Fs, int channels) celt_decoder_ctl(celt_dec, CELT_SET_SIGNALLING(0)); +#ifdef ENABLE_NEURAL_FEC + init_dred_decoder(&((silk_decoder_state*)silk_dec)->sPLC.dred_decoder); +#endif st->prev_mode = 0; st->frame_size = Fs/400; st->arch = opus_select_arch(); @@ -1089,6 +1092,7 @@ int opus_decoder_dred_input(OpusDecoder *st, const unsigned char *data, { silk_decoder_state *silk_dec; silk_dec = (silk_decoder_state*)((char*)st+st->silk_dec_offset); + /*printf("Found: %p of size %d\n", payload, payload_len);*/ dred_decode_redundancy_package(&silk_dec->sPLC.dred_decoder, silk_dec->sPLC.fec_features, payload, payload_len); /* Found something -- do the decoding. */ return 1; diff --git a/src/opus_encoder.c b/src/opus_encoder.c index 474fd04f..4354a249 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -2182,7 +2182,7 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_ #ifdef ENABLE_NEURAL_FEC if (1) { DREDEnc *dred = &((silk_encoder*)silk_enc)->state_Fxx[0].sCmn.dred_encoder; - opus_extension_data extension = {127, 0, dred->ec_buffer, dred->ec_encoder.storage}; + opus_extension_data extension = {127, 0, dred->ec_buffer, dred->ec_buffer_fill}; ret = opus_packet_pad_impl(data, ret, max_data_bytes, !st->use_vbr, &extension, 1); if (ret < 0) { |