summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan Buethe <jbuethe@amazon.de>2022-10-25 12:16:39 +0000
committerJan Buethe <jbuethe@amazon.de>2022-10-25 12:16:39 +0000
commit3d2ee957fa64f4f8172c24094a4d5150d1adb85e (patch)
tree415970285c0f8f5e471b08b3df8d9fcf8b51c2c3
parentf79693ed4b1ccbb252c67b1206af723a36af1095 (diff)
downloadopus-3d2ee957fa64f4f8172c24094a4d5150d1adb85e.tar.gz
finished encoder implementation
-rw-r--r--silk/dred_coding.c196
-rw-r--r--silk/dred_encoder.c100
-rw-r--r--silk/dred_encoder.h7
3 files changed, 298 insertions, 5 deletions
diff --git a/silk/dred_coding.c b/silk/dred_coding.c
new file mode 100644
index 00000000..90a1d98c
--- /dev/null
+++ b/silk/dred_coding.c
@@ -0,0 +1,196 @@
+/* Copyright (c) 2022 Amazon
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+ OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#include <math.h>
+#include <stdio.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "celt/vq.h"
+#include "celt/cwrs.h"
+#include "celt/laplace.h"
+
+#define LATENT_DIM 80
+#define PVQ_DIM 24
+#define PVQ_K 82
+
+static void encode_pvq(const int *iy, int N, int K, ec_enc *enc) {
+ int fits;
+ celt_assert(N==24 || N==12 || N==6);
+ fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6);
+ /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/
+ if (fits) encode_pulses(iy, N, K, enc);
+ else {
+ int N2 = N/2;
+ int K0=0;
+ int i;
+ for (i=0;i<N2;i++) K0 += abs(iy[i]);
+ /* FIXME: Don't use uniform probability for K0. */
+ ec_enc_uint(enc, K0, K+1);
+ /*printf("K0 = %d\n", K0);*/
+ encode_pvq(iy, N2, K0, enc);
+ encode_pvq(&iy[N2], N2, K-K0, enc);
+ }
+}
+
+void dred_encode_state(ec_enc *enc, float *x) {
+ int k;
+ int iy[PVQ_DIM];
+ op_pvq_search_c(x, iy, PVQ_K, PVQ_DIM, 0);
+ encode_pvq(iy, PVQ_DIM, PVQ_K, enc);
+}
+
+void dred_encode_latents(ec_enc *enc, const float *x, const opus_int16 *scale, const opus_int16 *dzone, const opus_int16 *r, const opus_int16 *p0) {
+ int i;
+ float eps = .1f;
+ int tell1 = ec_tell(enc);
+ for (i=0;i<LATENT_DIM;i++) {
+ float delta;
+ float xq;
+ int q;
+ delta = dzone[i]*(1.f/1024.f);
+ xq = x[i]*scale[i]*(1.f/256.f);
+ xq = xq - delta*tanh(xq/(delta+eps));
+ q = (int)floor(.5f+xq);
+ ec_laplace_encode_p0(enc, q, p0[i], r[i]);
+ }
+}
+
+
+
+static void decode_pvq(int *iy, int N, int K, ec_dec *dec) {
+ int fits;
+ celt_assert(N==24 || N==12 || N==6);
+ fits = (N==24 && K<=9) || (N==12 && K<=16) || (N==6);
+ /*printf("encode(%d,%d), fits=%d\n", N, K, fits);*/
+ if (fits) decode_pulses(iy, N, K, dec);
+ else {
+ int N2 = N/2;
+ int K0;
+ /* FIXME: Don't use uniform probability for K0. */
+ K0 = ec_dec_uint(dec, K+1);
+ /*printf("K0 = %d\n", K0);*/
+ decode_pvq(iy, N2, K0, dec);
+ decode_pvq(&iy[N2], N2, K-K0, dec);
+ }
+}
+
+void dred_decode_state(ec_enc *dec, float *x) {
+ int k;
+ int iy[PVQ_DIM];
+ float norm = 0;
+ int tell1 = ec_tell(dec);
+ decode_pvq(iy, PVQ_DIM, PVQ_K, dec);
+ /*printf("tell: %d\n", ec_tell(dec)-tell1);*/
+ for (k = 0; k < PVQ_DIM; k++)
+ {
+ norm += (float) iy[k] * iy[k];
+ }
+ norm = 1 / sqrtf(norm);
+ for (k = 0; k < PVQ_DIM; k++)
+ {
+ x[k] = iy[k] * norm;
+ }
+
+}
+
+void dred_decode_latents(ec_dec *dec, float *x, const opus_int16 *scale, const opus_int16 *r, const opus_int16 *p0) {
+ int i;
+ for (i=0;i<LATENT_DIM;i++) {
+ float xq;
+ int q;
+ q = ec_laplace_decode_p0(dec, p0[i], r[i]);
+ x[i] = q*256.f/(scale[i] == 0 ? 1 : scale[i]);
+ }
+}
+
+#if 0
+#include <stdlib.h>
+
+#define DATA_SIZE 10000
+
+int main()
+{
+ ec_enc enc;
+ ec_dec dec;
+ int iter;
+ int bytes;
+ opus_int16 scale[LATENT_DIM];
+ opus_int16 dzone[LATENT_DIM];
+ opus_int16 r[LATENT_DIM];
+ opus_int16 p0[LATENT_DIM];
+ unsigned char *ptr;
+ int k;
+
+ for (k=0;k<LATENT_DIM;k++) {
+ scale[k] = 256;
+ dzone[k] = 0;
+ r[k] = 12054;
+ p0[k] = 12893;
+ }
+ ptr = (unsigned char *)malloc(DATA_SIZE);
+ ec_enc_init(&enc,ptr,DATA_SIZE);
+ for (iter=0;iter<1;iter++) {
+ float x[PVQ_DIM];
+ float sum=1e-30;
+ for (k=0;k<PVQ_DIM;k++) {
+ x[k] = log(1e-15+(float)rand()/RAND_MAX)-log(1e-15+(float)rand()/RAND_MAX);
+ sum += fabs(x[k]);
+ }
+ for (k=0;k<PVQ_DIM;k++) x[k] *= (1.f/sum);
+ /*for (k=0;k<PVQ_DIM;k++) printf("%f ", x[k]);
+ printf("\n");*/
+ dred_encode_state(&enc, x);
+ }
+ for (iter=0;iter<1;iter++) {
+ float x[LATENT_DIM];
+ for (k=0;k<LATENT_DIM;k++) {
+ x[k] = log(1e-15+(float)rand()/RAND_MAX)-log(1e-15+(float)rand()/RAND_MAX);
+ }
+ for (k=0;k<LATENT_DIM;k++) printf("%f ", x[k]);
+ printf("\n");
+ dred_encode_latents(&enc, x, scale, dzone, r, p0);
+ }
+ bytes = (ec_tell(&enc)+7)/8;
+ ec_enc_shrink(&enc, bytes);
+ ec_enc_done(&enc);
+
+ ec_dec_init(&dec,ec_get_buffer(&enc),bytes);
+ for (iter=0;iter<1;iter++) {
+ float x[PVQ_DIM];
+ dred_decode_state(&dec, x);
+ }
+ for (iter=0;iter<1;iter++) {
+ float x[LATENT_DIM];
+ dred_decode_latents(&dec, x, scale, r, p0);
+ for (k=0;k<LATENT_DIM;k++) printf("%f ", x[k]);
+ printf("\n");
+ }
+}
+#endif \ No newline at end of file
diff --git a/silk/dred_encoder.c b/silk/dred_encoder.c
index c61533bf..100d9365 100644
--- a/silk/dred_encoder.c
+++ b/silk/dred_encoder.c
@@ -1,6 +1,14 @@
#include <string.h>
+#include <stdio.h>
+#include <math.h>
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
#include "dred_encoder.h"
+#include "dred_coding.h"
void init_dred_encoder(DREDEnc* enc)
@@ -12,19 +20,103 @@ void init_dred_encoder(DREDEnc* enc)
void dred_encode_silk_frame(DREDEnc *enc, const opus_int16 *silk_frame)
{
+ opus_int16 *dead_zone = DRED_rdovae_get_dead_zone_pointer();
+ opus_int16 *p0 = DRED_rdovae_get_p0_pointer();
+ opus_int16 *quant_scales = DRED_rdovae_get_quant_scales_pointer();
+ opus_int16 *r = DRED_rdovae_get_r_pointer();
+
+ float input_buffer[2*DRED_NUM_FEATURES] = {0};
+
+ int bytes;
+ int q_level;
+ int i;
+ int offset;
+
/* delay signal by 79 samples */
- memmove(enc->input_buffer, enc->input_buffer + DRED_SILK_ENCODER_DELAY, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer));
+ memmove(enc->input_buffer, enc->input_buffer + DRED_DFRAME_SIZE, DRED_SILK_ENCODER_DELAY * sizeof(*enc->input_buffer));
memcpy(enc->input_buffer + DRED_SILK_ENCODER_DELAY, silk_frame, DRED_DFRAME_SIZE * sizeof(*silk_frame));
/* shift latents buffer */
- memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, DRED_LATENT_DIM * sizeof(*enc->latents_buffer));
+ memmove(enc->latents_buffer + DRED_LATENT_DIM, enc->latents_buffer, (DRED_MAX_FRAMES - 1) * DRED_LATENT_DIM * sizeof(*enc->latents_buffer));
/* calculate LPCNet features */
lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer, enc->feature_buffer);
- lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + DRED_NUM_FEATURES);
+ lpcnet_compute_single_frame_features(enc->lpcnet_enc_state, enc->input_buffer + DRED_FRAME_SIZE, enc->feature_buffer + 36);
+
+ /* prepare input buffer (discard LPC coefficients) */
+ memcpy(input_buffer, enc->feature_buffer, DRED_NUM_FEATURES * sizeof(input_buffer[0]));
+ memcpy(input_buffer + DRED_NUM_FEATURES, enc->feature_buffer + 36, DRED_NUM_FEATURES * sizeof(input_buffer[0]));
/* run RDOVAE encoder */
- DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, enc->feature_buffer);
+ DRED_rdovae_encode_dframe(enc->rdovae_enc, enc->latents_buffer, enc->state_buffer, input_buffer);
/* entropy coding of state and latents */
+ ec_enc_init(&enc->ec_encoder, enc->ec_buffer, DRED_MAX_DATA_SIZE);
+ dred_encode_state(&enc->ec_encoder, enc->state_buffer);
+
+ for (i = 0; i < DRED_NUM_REDUNDANCY_FRAMES; i += 2)
+ {
+ q_level = (int) round(DRED_ENC_Q0 + 1.f * (DRED_ENC_Q1 - DRED_ENC_Q0) * i / (DRED_NUM_REDUNDANCY_FRAMES - 2));
+ offset = q_level * DRED_LATENT_DIM;
+
+ dred_encode_latents(
+ &enc->ec_encoder,
+ enc->latents_buffer + i * DRED_LATENT_DIM,
+ quant_scales + offset,
+ dead_zone + offset,
+ r + offset,
+ p0 + offset
+ );
+ }
+
+ bytes = (ec_tell(&enc->ec_encoder)+7)/8;
+ ec_enc_shrink(&enc->ec_encoder, bytes);
+ ec_enc_done(&enc->ec_encoder);
+
+#if 1
+ printf("packet size: %d\n", bytes*8);
+#endif
+
+
+#if 0
+ /* trial decoding */
+ float state[24];
+ float features[4 * 20];
+ float latents[80];
+ float zeros[36 - 20] = {0};
+ static FILE *fid;
+ RDOVAEDec *rdovae_dec = DRED_rdovae_create_decoder();
+
+ if (fid == NULL)
+ {
+ fid = fopen("features_last.f32", "wb");
+ }
+
+ /* decode state */
+ ec_enc ec_dec;
+ ec_dec_init(&ec_dec, ec_get_buffer(&enc->ec_encoder), bytes);
+ dred_decode_state(&ec_dec, state);
+
+ dred_decode_latents(
+ &ec_dec,
+ latents,
+ quant_scales + offset,
+ r + offset,
+ p0 + offset
+ );
+
+ DRED_rdovae_dec_init_states(rdovae_dec, state);
+
+ DRED_rdovae_decode_qframe(rdovae_dec, features, latents);
+
+ DRED_rdovae_destroy_decoder(rdovae_dec);
+
+ fwrite(features + 40, sizeof(float), 20, fid);
+ fwrite(zeros, sizeof(float), 16, fid);
+ fwrite(features + 60, sizeof(float), 20, fid);
+ fwrite(zeros, sizeof(float), 16, fid);
+
+#endif
+
+
} \ No newline at end of file
diff --git a/silk/dred_encoder.h b/silk/dred_encoder.h
index 2c2672f3..f113061c 100644
--- a/silk/dred_encoder.h
+++ b/silk/dred_encoder.h
@@ -14,12 +14,17 @@
#define DRED_SILK_ENCODER_DELAY 79
#define DRED_FRAME_SIZE 160
#define DRED_DFRAME_SIZE (2 * (DRED_FRAME_SIZE))
+#define DRED_MAX_DATA_SIZE 10000
+#define DRED_ENC_Q0 9
+#define DRED_ENC_Q1 15
+#define DRED_NUM_REDUNDANCY_FRAMES 50
typedef struct {
- opus_int16 input_buffer[79 + 2 * 160];
+ opus_int16 input_buffer[DRED_DFRAME_SIZE + DRED_SILK_ENCODER_DELAY];
float feature_buffer[2 * 36];
float latents_buffer[DRED_MAX_FRAMES * DRED_LATENT_DIM];
float state_buffer[24];
+ unsigned char ec_buffer[DRED_MAX_DATA_SIZE];
ec_enc ec_encoder;
LPCNetEncState *lpcnet_enc_state;
RDOVAEEnc *rdovae_enc;