From 3d2ee957fa64f4f8172c24094a4d5150d1adb85e Mon Sep 17 00:00:00 2001 From: Jan Buethe Date: Tue, 25 Oct 2022 12:16:39 +0000 Subject: finished encoder implementation --- silk/dred_coding.c | 196 ++++++++++++++++++++++++++++++++++++++++++++++++++++ silk/dred_encoder.c | 100 +++++++++++++++++++++++++-- silk/dred_encoder.h | 7 +- 3 files changed, 298 insertions(+), 5 deletions(-) create mode 100644 silk/dred_coding.c (limited to 'silk') diff --git a/silk/dred_coding.c b/silk/dred_coding.c new file mode 100644 index 00000000..90a1d98c --- /dev/null +++ b/silk/dred_coding.c @@ -0,0 +1,196 @@ +/* Copyright (c) 2022 Amazon + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "celt/vq.h" +#include "celt/cwrs.h" +#include "celt/laplace.h" + +#define LATENT_DIM 80 +#define PVQ_DIM 24 +#define PVQ_K 82 + +static void encode_pvq(const int *iy, int N, int K, ec_enc *enc) { + int fits; + celt_assert(N==24 || N==12 || N==6); + fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6); + /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/ + if (fits) encode_pulses(iy, N, K, enc); + else { + int N2 = N/2; + int K0=0; + int i; + for (i=0;i + +#define DATA_SIZE 10000 + +int main() +{ + ec_enc enc; + ec_dec dec; + int iter; + int bytes; + opus_int16 scale[LATENT_DIM]; + opus_int16 dzone[LATENT_DIM]; + opus_int16 r[LATENT_DIM]; + opus_int16 p0[LATENT_DIM]; + unsigned char *ptr; + int k; + + for (k=0;k +#include +#include + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + #include "dred_encoder.h" +#include "dred_coding.h" void init_dred_encoder(DREDEnc* enc) @@ -12,19 +20,103 @@ void init_dred_encoder(DREDEnc* enc) void dred_encode_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame) { + opus_int16 *dead_zone = DRED_rdovae_get_dead_zone_pointer(); + opus_int16 *p0 = DRED_rdovae_get_p0_pointer(); + opus_int16 *quant_scales = DRED_rdovae_get_quant_scales_pointer(); + opus_int16 *r = DRED_rdovae_get_r_pointer(); + + float input_buffer[2*DRED_NUM_FEATURES] = {0}; + + int bytes; + int q_level; + int i; + int offset; + /* delay signal by 79 samples */ - memmove(enc->input_buffer, enc->input_buffer + DRED_SILK_ENCODER_DELAY, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer)); + memmove(enc->input_buffer, enc->input_buffer + DRED_DFRAME_SIZE, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer)); memcpy(enc->input_buffer + DRED_SILK_ENCODER_DELAY, silk_frame, DRED_DFRAME_SIZE * sizeof(*silk_frame)); /* shift latents buffer */ - memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, DRED_LATENT_DIM * sizeof(*enc->latents_buffer)); + memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM * sizeof(*enc->latents_buffer)); /* calculate LPCNet features */ lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer, enc->feature_buffer); - lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + DRED_NUM_FEATURES); + lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + 36); + + /* prepare input buffer (discard LPC coefficients) */ + memcpy(input_buffer, enc->feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0])); + memcpy(input_buffer + DRED_NUM_FEATURES, enc->feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0])); /* run RDOVAE encoder */ - DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, enc->feature_buffer); + DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, input_buffer); /* entropy coding of state and latents */ + ec_enc_init(&enc->ec_encoder, enc->ec_buffer, DRED_MAX_DATA_SIZE); + dred_encode_state(&enc->ec_encoder, enc->state_buffer); + + for (i = 0; i < DRED_NUM_REDUNDANCY_FRAMES; i += 2) + { + q_level = (int) round(DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2)); + offset = q_level * DRED_LATENT_DIM; + + dred_encode_latents( + &enc->ec_encoder, + enc->latents_buffer + i * DRED_LATENT_DIM, + quant_scales + offset, + dead_zone + offset, + r + offset, + p0 + offset + ); + } + + bytes = (ec_tell(&enc->ec_encoder)+7)/8; + ec_enc_shrink(&enc->ec_encoder, bytes); + ec_enc_done(&enc->ec_encoder); + +#if 1 + printf("packet size: %d\n", bytes*8); +#endif + + +#if 0 + /* trial decoding */ + float state[24]; + float features[4 * 20]; + float latents[80]; + float zeros[36 - 20] = {0}; + static FILE *fid; + RDOVAEDec *rdovae_dec = DRED_rdovae_create_decoder(); + + if (fid == NULL) + { + fid = fopen("features_last.f32", "wb"); + } + + /* decode state */ + ec_enc ec_dec; + ec_dec_init(&ec_dec, ec_get_buffer(&enc->ec_encoder), bytes); + dred_decode_state(&ec_dec, state); + + dred_decode_latents( + &ec_dec, + latents, + quant_scales + offset, + r + offset, + p0 + offset + ); + + DRED_rdovae_dec_init_states(rdovae_dec, state); + + DRED_rdovae_decode_qframe(rdovae_dec, features, latents); + + DRED_rdovae_destroy_decoder(rdovae_dec); + + fwrite(features + 40, sizeof(float), 20, fid); + fwrite(zeros, sizeof(float), 16, fid); + fwrite(features + 60, sizeof(float), 20, fid); + fwrite(zeros, sizeof(float), 16, fid); + +#endif + + } \ No newline at end of file diff --git a/silk/dred_encoder.h b/silk/dred_encoder.h index 2c2672f3..f113061c 100644 --- a/silk/dred_encoder.h +++ b/silk/dred_encoder.h @@ -14,12 +14,17 @@ #define DRED_SILK_ENCODER_DELAY 79 #define DRED_FRAME_SIZE 160 #define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE)) +#define DRED_MAX_DATA_SIZE 10000 +#define DRED_ENC_Q0 9 +#define DRED_ENC_Q1 15 +#define DRED_NUM_REDUNDANCY_FRAMES 50 typedef struct { - opus_int16 input_buffer[79 + 2 * 160]; + opus_int16 input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY]; float feature_buffer[2 * 36]; float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM]; float state_buffer[24]; + unsigned char ec_buffer[DRED_MAX_DATA_SIZE]; ec_enc ec_encoder; LPCNetEncState *lpcnet_enc_state; RDOVAEEnc *rdovae_enc; -- cgit v1.2.1