summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2017-07-12 16:55:28 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2017-07-17 14:02:59 -0400
commitbcd006b57f54a183bc91e0d0d37ea3d968a6be33 (patch)
tree56d030f18ebb9850f55f2a913e8bd6db000b0178
parent2a4f49448f66f664f90edd220b8467d9b06938ab (diff)
downloadopus-exp_rnn3.tar.gz
Add RNN for VAD and speech/music classificationexp_rnn3
Based on two dense layers with a GRU layer in the middle
-rw-r--r--celt/celt.h3
-rw-r--r--src/analysis.c271
-rw-r--r--src/analysis.h15
-rw-r--r--src/mlp.c168
-rw-r--r--src/mlp.h35
-rw-r--r--src/mlp_data.c325
-rw-r--r--src/opus_encoder.c11
7 files changed, 473 insertions, 355 deletions
diff --git a/celt/celt.h b/celt/celt.h
index 70175301..f73f29dd 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -59,7 +59,8 @@ typedef struct {
float noisiness;
float activity;
float music_prob;
- float vad_prob;
+ float music_prob_min;
+ float music_prob_max;
int bandwidth;
float activity_probability;
/* Store as Q6 char to save space. */
diff --git a/src/analysis.c b/src/analysis.c
index f4160e4b..1d6dd829 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -50,6 +50,8 @@
#ifndef DISABLE_FLOAT_API
+#define TRANSITION_PENALTY 10
+
static const float dct_table[128] = {
0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
@@ -224,19 +226,22 @@ void tonality_analysis_reset(TonalityAnalysisState *tonal)
/* Clear non-reusable fields. */
char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START;
OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
- tonal->music_confidence = .9f;
- tonal->speech_confidence = .1f;
}
void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
{
int pos;
int curr_lookahead;
- float psum;
float tonality_max;
float tonality_avg;
int tonality_count;
int i;
+ int pos0;
+ float prob_avg;
+ float prob_count;
+ float prob_min, prob_max;
+ float vad_prob;
+ int mpos, vpos;
pos = tonal->read_pos;
curr_lookahead = tonal->write_pos-tonal->read_pos;
@@ -254,6 +259,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
pos--;
if (pos<0)
pos = DETECT_SIZE-1;
+ pos0 = pos;
OPUS_COPY(info_out, &tonal->info[pos], 1);
tonality_max = tonality_avg = info_out->tonality;
tonality_count = 1;
@@ -270,6 +276,107 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
tonality_count++;
}
info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f);
+
+ mpos = vpos = pos0;
+ /* If we have enough look-ahead, compensate for the ~5-frame delay in the music prob and
+ ~1 frame delay in the VAD prob. */
+ if (curr_lookahead > 15)
+ {
+ mpos += 5;
+ if (mpos>=DETECT_SIZE)
+ mpos -= DETECT_SIZE;
+ vpos += 1;
+ if (vpos>=DETECT_SIZE)
+ vpos -= DETECT_SIZE;
+ }
+
+ /* The following calculations attempt to minimize a "badness function"
+ for the transition. When switching from speech to music, the badness
+ of switching at frame k is
+ b_k = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T)
+ where
+ v_i is the activity probability (VAD) at frame i,
+ p_i is the music probability at frame i
+ T is the probability threshold for switching
+ S is the penalty for switching during active audio rather than silence
+ the current frame has index i=0
+
+ Rather than apply badness to directly decide when to switch, what we compute
+ instead is the threshold for which the optimal switching point is now. When
+ considering whether to switch now (frame 0) or at frame k, we have:
+ S*v_0 = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T)
+ which gives us:
+ T = ( \sum_{i=0}^{k-1} v_i*p_i + S*(v_k-v_0) ) / ( \sum_{i=0}^{k-1} v_i )
+ We take the min threshold across all positive values of k (up to the maximum
+ amount of lookahead we have) to give us the threshold for which the current
+ frame is the optimal switch point.
+
+ The last step is that we need to consider whether we want to switch at all.
+ For that we use the average of the music probability over the entire window.
+ If the threshold is higher than that average we're not going to
+ switch, so we compute a min with the average as well. The result of all these
+ min operations is music_prob_min, which gives the threshold for switching to music
+ if we're currently encoding for speech.
+
+ We do the exact opposite to compute music_prob_max which is used for switching
+ from music to speech.
+ */
+ prob_min = 1.f;
+ prob_max = 0.f;
+ vad_prob = tonal->info[vpos].activity_probability;
+ prob_count = MAX16(.1f, vad_prob);
+ prob_avg = MAX16(.1f, vad_prob)*tonal->info[mpos].music_prob;
+ while (1)
+ {
+ float pos_vad;
+ mpos++;
+ if (mpos==DETECT_SIZE)
+ mpos = 0;
+ if (mpos == tonal->write_pos)
+ break;
+ vpos++;
+ if (vpos==DETECT_SIZE)
+ vpos = 0;
+ if (vpos == tonal->write_pos)
+ break;
+ pos_vad = tonal->info[vpos].activity_probability;
+ prob_min = MIN16((prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_min);
+ prob_max = MAX16((prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_max);
+ prob_count += MAX16(.1f, pos_vad);
+ prob_avg += MAX16(.1f, pos_vad)*tonal->info[mpos].music_prob;
+ }
+ info_out->music_prob = prob_avg/prob_count;
+ prob_min = MIN16(prob_avg/prob_count, prob_min);
+ prob_max = MAX16(prob_avg/prob_count, prob_max);
+ prob_min = MAX16(prob_min, 0.f);
+ prob_max = MIN16(prob_max, 1.f);
+
+ /* If we don't have enough look-ahead, do our best to make a decent decision. */
+ if (curr_lookahead < 10)
+ {
+ float pmin, pmax;
+ pmin = prob_min;
+ pmax = prob_max;
+ pos = pos0;
+ /* Look for min/max in the past. */
+ for (i=0;i<IMIN(tonal->count-1, 15);i++)
+ {
+ pos--;
+ if (pos < 0)
+ pos = DETECT_SIZE-1;
+ pmin = MIN16(pmin, tonal->info[pos].music_prob);
+ pmax = MAX16(pmax, tonal->info[pos].music_prob);
+ }
+ /* Bias against switching on active audio. */
+ pmin = MAX16(0.f, pmin - .1f*vad_prob);
+ pmax = MIN16(1.f, pmax + .1f*vad_prob);
+ prob_min += (1.f-.1f*curr_lookahead)*(pmin - prob_min);
+ prob_max += (1.f-.1f*curr_lookahead)*(pmax - prob_max);
+ }
+ info_out->music_prob_min = prob_min;
+ info_out->music_prob_max = prob_max;
+
+ /* printf("%f %f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob, info_out->music_prob); */
tonal->read_subframe += len/(tonal->Fs/400);
while (tonal->read_subframe>=8)
{
@@ -278,21 +385,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
}
if (tonal->read_pos>=DETECT_SIZE)
tonal->read_pos-=DETECT_SIZE;
-
- /* The -1 is to compensate for the delay in the features themselves. */
- curr_lookahead = IMAX(curr_lookahead-1, 0);
-
- psum=0;
- /* Summing the probability of transition patterns that involve music at
- time (DETECT_SIZE-curr_lookahead-1) */
- for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
- psum += tonal->pmusic[i];
- for (;i<DETECT_SIZE;i++)
- psum += tonal->pspeech[i];
- psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
- /*printf("%f %f %f %f %f\n", psum, info_out->music_prob, info_out->vad_prob, info_out->activity_probability, info_out->tonality);*/
-
- info_out->music_prob = psum;
}
static const float std_feature_bias[9] = {
@@ -352,6 +444,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
float band_log2[NB_TBANDS+1];
float leakage_from[NB_TBANDS+1];
float leakage_to[NB_TBANDS+1];
+ float layer_out[MAX_NEURONS];
SAVE_STACK;
alpha = 1.f/IMIN(10, 1+tonal->count);
@@ -368,12 +461,6 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
offset = 3*offset/2;
}
- if (tonal->count<4) {
- if (tonal->application == OPUS_APPLICATION_VOIP)
- tonal->music_prob = .1f;
- else
- tonal->music_prob = .625f;
- }
kfft = celt_mode->mdct.kfft[0];
if (tonal->count==0)
tonal->mem_fill = 240;
@@ -761,139 +848,17 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
features[23] = info->tonality_slope + 0.069216f;
features[24] = tonal->lowECount - 0.067930f;
- mlp_process(&net, features, frame_probs);
- frame_probs[0] = .5f*(frame_probs[0]+1);
- /* Curve fitting between the MLP probability and the actual probability */
- /*frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);*/
- /* Probability of active audio (as opposed to silence) */
- frame_probs[1] = .5f*frame_probs[1]+.5f;
- frame_probs[1] *= frame_probs[1];
+ compute_dense(&layer0, layer_out, features);
+ compute_gru(&layer1, tonal->rnn_state, layer_out);
+ compute_dense(&layer2, frame_probs, tonal->rnn_state);
/* Probability of speech or music vs noise */
info->activity_probability = frame_probs[1];
+ /* It seems like the RNN tends to have a bias towards speech and this
+ warping of the probabilities compensates for it. */
+ info->music_prob = frame_probs[0] * (2 - frame_probs[0]);
- /*printf("%f %f\n", frame_probs[0], frame_probs[1]);*/
- {
- /* Probability of state transition */
- float tau;
- /* Represents independence of the MLP probabilities, where
- beta=1 means fully independent. */
- float beta;
- /* Denormalized probability of speech (p0) and music (p1) after update */
- float p0, p1;
- /* Probabilities for "all speech" and "all music" */
- float s0, m0;
- /* Probability sum for renormalisation */
- float psum;
- /* Instantaneous probability of speech and music, with beta pre-applied. */
- float speech0;
- float music0;
- float p, q;
-
- /* More silence transitions for speech than for music. */
- tau = .001f*tonal->music_prob + .01f*(1-tonal->music_prob);
- p = MAX16(.05f,MIN16(.95f,frame_probs[1]));
- q = MAX16(.05f,MIN16(.95f,tonal->vad_prob));
- beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
- /* p0 and p1 are the probabilities of speech and music at this frame
- using only information from previous frame and applying the
- state transition model */
- p0 = (1-tonal->vad_prob)*(1-tau) + tonal->vad_prob *tau;
- p1 = tonal->vad_prob *(1-tau) + (1-tonal->vad_prob)*tau;
- /* We apply the current probability with exponent beta to work around
- the fact that the probability estimates aren't independent. */
- p0 *= (float)pow(1-frame_probs[1], beta);
- p1 *= (float)pow(frame_probs[1], beta);
- /* Normalise the probabilities to get the Marokv probability of music. */
- tonal->vad_prob = p1/(p0+p1);
- info->vad_prob = tonal->vad_prob;
- /* Consider that silence has a 50-50 probability of being speech or music. */
- frame_probs[0] = tonal->vad_prob*frame_probs[0] + (1-tonal->vad_prob)*.5f;
-
- /* One transition every 3 minutes of active audio */
- tau = .0001f;
- /* Adapt beta based on how "unexpected" the new prob is */
- p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
- q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
- beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
- /* p0 and p1 are the probabilities of speech and music at this frame
- using only information from previous frame and applying the
- state transition model */
- p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau;
- p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
- /* We apply the current probability with exponent beta to work around
- the fact that the probability estimates aren't independent. */
- p0 *= (float)pow(1-frame_probs[0], beta);
- p1 *= (float)pow(frame_probs[0], beta);
- /* Normalise the probabilities to get the Marokv probability of music. */
- tonal->music_prob = p1/(p0+p1);
- info->music_prob = tonal->music_prob;
-
- /*printf("%f %f %f %f\n", frame_probs[0], frame_probs[1], tonal->music_prob, tonal->vad_prob);*/
- /* This chunk of code deals with delayed decision. */
- psum=1e-20f;
- /* Instantaneous probability of speech and music, with beta pre-applied. */
- speech0 = (float)pow(1-frame_probs[0], beta);
- music0 = (float)pow(frame_probs[0], beta);
- if (tonal->count==1)
- {
- if (tonal->application == OPUS_APPLICATION_VOIP)
- tonal->pmusic[0] = .1f;
- else
- tonal->pmusic[0] = .625f;
- tonal->pspeech[0] = 1-tonal->pmusic[0];
- }
- /* Updated probability of having only speech (s0) or only music (m0),
- before considering the new observation. */
- s0 = tonal->pspeech[0] + tonal->pspeech[1];
- m0 = tonal->pmusic [0] + tonal->pmusic [1];
- /* Updates s0 and m0 with instantaneous probability. */
- tonal->pspeech[0] = s0*(1-tau)*speech0;
- tonal->pmusic [0] = m0*(1-tau)*music0;
- /* Propagate the transition probabilities */
- for (i=1;i<DETECT_SIZE-1;i++)
- {
- tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
- tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
- }
- /* Probability that the latest frame is speech, when all the previous ones were music. */
- tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
- /* Probability that the latest frame is music, when all the previous ones were speech. */
- tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
-
- /* Renormalise probabilities to 1 */
- for (i=0;i<DETECT_SIZE;i++)
- psum += tonal->pspeech[i] + tonal->pmusic[i];
- psum = 1.f/psum;
- for (i=0;i<DETECT_SIZE;i++)
- {
- tonal->pspeech[i] *= psum;
- tonal->pmusic [i] *= psum;
- }
- psum = tonal->pmusic[0];
- for (i=1;i<DETECT_SIZE;i++)
- psum += tonal->pspeech[i];
-
- /* Estimate our confidence in the speech/music decisions */
- if (frame_probs[1]>.75)
- {
- if (tonal->music_prob>.9)
- {
- float adapt;
- adapt = 1.f/(++tonal->music_confidence_count);
- tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
- tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
- }
- if (tonal->music_prob<.1)
- {
- float adapt;
- adapt = 1.f/(++tonal->speech_confidence_count);
- tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
- tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
- }
- }
- }
- tonal->last_music = tonal->music_prob>.5f;
+ /*printf("%f %f %f\n", frame_probs[0], frame_probs[1], info->music_prob);*/
#ifdef MLP_TRAINING
for (i=0;i<25;i++)
printf("%f ", features[i]);
diff --git a/src/analysis.h b/src/analysis.h
index cac51dfa..289c845e 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -30,6 +30,7 @@
#include "celt.h"
#include "opus_private.h"
+#include "mlp.h"
#define NB_FRAMES 8
#define NB_TBANDS 18
@@ -64,28 +65,16 @@ typedef struct {
float mem[32];
float cmean[8];
float std[9];
- float music_prob;
- float vad_prob;
float Etracker;
float lowECount;
int E_count;
- int last_music;
int count;
int analysis_offset;
- /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
- pspeech[0] is the probability that all frames in the window are speech. */
- float pspeech[DETECT_SIZE];
- /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
- pmusic[0] is the probability that all frames in the window are music. */
- float pmusic[DETECT_SIZE];
- float speech_confidence;
- float music_confidence;
- int speech_confidence_count;
- int music_confidence_count;
int write_pos;
int read_pos;
int read_subframe;
float hp_ener_accum;
+ float rnn_state[MAX_NEURONS];
opus_val32 downmix_state[3];
AnalysisInfo info[DETECT_SIZE];
} TonalityAnalysisState;
diff --git a/src/mlp.c b/src/mlp.c
index ff9e50df..26c8941f 100644
--- a/src/mlp.c
+++ b/src/mlp.c
@@ -1,5 +1,5 @@
/* Copyright (c) 2008-2011 Octasic Inc.
- Written by Jean-Marc Valin */
+ 2012-2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -29,42 +29,13 @@
#include "config.h"
#endif
+#include <math.h>
#include "opus_types.h"
#include "opus_defines.h"
-
-#include <math.h>
-#include "mlp.h"
#include "arch.h"
#include "tansig_table.h"
-#define MAX_NEURONS 100
+#include "mlp.h"
-#if 0
-static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
-{
- int i;
- opus_val16 xx; /* Q11 */
- /*double x, y;*/
- opus_val16 dy, yy; /* Q14 */
- /*x = 1.9073e-06*_x;*/
- if (_x>=QCONST32(8,19))
- return QCONST32(1.,14);
- if (_x<=-QCONST32(8,19))
- return -QCONST32(1.,14);
- xx = EXTRACT16(SHR32(_x, 8));
- /*i = lrint(25*x);*/
- i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
- /*x -= .04*i;*/
- xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
- /*x = xx*(1./2048);*/
- /*y = tansig_table[250+i];*/
- yy = tansig_table[250+i];
- /*y = yy*(1./16384);*/
- dy = 16384-MULT16_16_Q14(yy,yy);
- yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
- return yy;
-}
-#else
-/*extern const float tansig_table[501];*/
static OPUS_INLINE float tansig_approx(float x)
{
int i;
@@ -92,54 +63,97 @@ static OPUS_INLINE float tansig_approx(float x)
y = y + x*dy*(1 - y*x);
return sign*y;
}
-#endif
-#if 0
-void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
+static OPUS_INLINE float sigmoid_approx(float x)
{
- int j;
- opus_val16 hidden[MAX_NEURONS];
- const opus_val16 *W = m->weights;
- /* Copy to tmp_in */
- for (j=0;j<m->topo[1];j++)
- {
- int k;
- opus_val32 sum = SHL32(EXTEND32(*W++),8);
- for (k=0;k<m->topo[0];k++)
- sum = MAC16_16(sum, in[k],*W++);
- hidden[j] = tansig_approx(sum);
- }
- for (j=0;j<m->topo[2];j++)
- {
- int k;
- opus_val32 sum = SHL32(EXTEND32(*W++),14);
- for (k=0;k<m->topo[1];k++)
- sum = MAC16_16(sum, hidden[k], *W++);
- out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
- }
+ return .5 + .5*tansig_approx(.5*x);
}
-#else
-void mlp_process(const MLP *m, const float *in, float *out)
+
+void compute_dense(const DenseLayer *layer, float *output, const float *input)
{
- int j;
- float hidden[MAX_NEURONS];
- const float *W = m->weights;
- /* Copy to tmp_in */
- for (j=0;j<m->topo[1];j++)
- {
- int k;
- float sum = *W++;
- for (k=0;k<m->topo[0];k++)
- sum = sum + in[k]**W++;
- hidden[j] = tansig_approx(sum);
- }
- for (j=0;j<m->topo[2];j++)
- {
- int k;
- float sum = *W++;
- for (k=0;k<m->topo[1];k++)
- sum = sum + hidden[k]**W++;
- out[j] = tansig_approx(sum);
- }
+ int i, j;
+ int N, M;
+ int stride;
+ M = layer->nb_inputs;
+ N = layer->nb_neurons;
+ stride = N;
+ for (i=0;i<N;i++)
+ {
+ /* Compute update gate. */
+ float sum = layer->bias[i];
+ for (j=0;j<M;j++)
+ sum += layer->input_weights[j*stride + i]*input[j];
+ output[i] = WEIGHTS_SCALE*sum;
+ }
+ if (layer->sigmoid) {
+ for (i=0;i<N;i++)
+ output[i] = sigmoid_approx(output[i]);
+ } else {
+ for (i=0;i<N;i++)
+ output[i] = tansig_approx(output[i]);
+ }
+}
+
+void compute_gru(const GRULayer *gru, float *state, const float *input)
+{
+ int i, j;
+ int N, M;
+ int stride;
+ float z[MAX_NEURONS];
+ float r[MAX_NEURONS];
+ float h[MAX_NEURONS];
+ M = gru->nb_inputs;
+ N = gru->nb_neurons;
+ stride = 3*N;
+ for (i=0;i<N;i++)
+ {
+ /* Compute update gate. */
+ float sum = gru->bias[i];
+ for (j=0;j<M;j++)
+ sum += gru->input_weights[j*stride + i]*input[j];
+ for (j=0;j<N;j++)
+ sum += gru->recurrent_weights[j*stride + i]*state[j];
+ z[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
+ }
+ for (i=0;i<N;i++)
+ {
+ /* Compute reset gate. */
+ float sum = gru->bias[N + i];
+ for (j=0;j<M;j++)
+ sum += gru->input_weights[N + j*stride + i]*input[j];
+ for (j=0;j<N;j++)
+ sum += gru->recurrent_weights[N + j*stride + i]*state[j];
+ r[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
+ }
+ for (i=0;i<N;i++)
+ {
+ /* Compute output. */
+ float sum = gru->bias[2*N + i];
+ for (j=0;j<M;j++)
+ sum += gru->input_weights[2*N + j*stride + i]*input[j];
+ for (j=0;j<N;j++)
+ sum += gru->recurrent_weights[2*N + j*stride + i]*state[j]*r[j];
+ h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*sum);
+ }
+ for (i=0;i<N;i++)
+ state[i] = h[i];
+}
+
+#if 0
+int main() {
+ float state[12] = {0};
+ float input[25];
+ float out0[16];
+ float out[2];
+ while (1)
+ {
+ int i;
+ for (i=0;i<25;i++) scanf("%f", &input[i]);
+ if (feof(stdin)) break;
+ compute_dense(&layer0, out0, input);
+ compute_gru(&layer1, state, out0);
+ compute_dense(&layer2, out, state);
+ printf("%f %f\n", out[0], out[1]);
+ }
}
#endif
diff --git a/src/mlp.h b/src/mlp.h
index 618e246e..e3d1e9e5 100644
--- a/src/mlp.h
+++ b/src/mlp.h
@@ -1,5 +1,4 @@
-/* Copyright (c) 2008-2011 Octasic Inc.
- Written by Jean-Marc Valin */
+/* Copyright (c) 2017 Jean-Marc Valin */
/*
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -28,16 +27,34 @@
#ifndef _MLP_H_
#define _MLP_H_
-#include "arch.h"
+#include "opus_types.h"
+
+#define WEIGHTS_SCALE (1.f/8192)
+
+#define MAX_NEURONS 20
typedef struct {
- int layers;
- const int *topo;
- const float *weights;
-} MLP;
+ const opus_int16 *bias;
+ const opus_int16 *input_weights;
+ int nb_inputs;
+ int nb_neurons;
+ int sigmoid;
+} DenseLayer;
+
+typedef struct {
+ const opus_int16 *bias;
+ const opus_int16 *input_weights;
+ const opus_int16 *recurrent_weights;
+ int nb_inputs;
+ int nb_neurons;
+} GRULayer;
+
+extern const DenseLayer layer0;
+extern const GRULayer layer1;
+extern const DenseLayer layer2;
-extern const MLP net;
+void compute_dense(const DenseLayer *layer, float *output, const float *input);
-void mlp_process(const MLP *m, const float *in, float *out);
+void compute_gru(const GRULayer *gru, float *state, const float *input);
#endif /* _MLP_H_ */
diff --git a/src/mlp_data.c b/src/mlp_data.c
index a819880b..5ddc94d5 100644
--- a/src/mlp_data.c
+++ b/src/mlp_data.c
@@ -1,112 +1,235 @@
+/*This file is automatically generated from a Keras model*/
+
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include "mlp.h"
-/* RMS error was 0.280492, seed was 1480478173 */
-/* 0.005976 0.031821 (0.280494 0.280492) done */
+static const opus_int16 layer0_weights[400] = {
+ 622, 853, -153, 75, -68, -498, -1936, -291,
+ -60, -293, 880, 977, -492, 245, -1111, -1622,
+ -6366, -362, 91, -1764, 1064, -1579, -2406, 696,
+ 216, -850, 316, -4033, -498, -2667, 509, 61,
+ 5334, -561, 1022, -3855, -228, -1117, -266, 326,
+ -1669, 262, 2970, 1810, -2451, -3331, -4970, -617,
+ 2669, 743, 717, 1942, 2858, 253, -2397, 1525,
+ -1665, -919, -945, -3356, 1598, 469, -5746, 1111,
+ -1328, 1331, -140, -1067, -4318, 461, 2235, 702,
+ 905, -45, -734, 779, -2457, -4860, -16, 979,
+ -1769, -1167, -1998, 1009, -6205, -2645, -2309, 2178,
+ 1951, 1433, -1456, 1238, -1195, 4550, -587, -1215,
+ -2388, 4203, 1051, 1118, -1861, 3513, -355, 1787,
+ 3133, -466, 4455, 1794, -167, -3224, 3442, 1458,
+ -9313, 414, -4165, -872, 2574, -3401, -5647, -861,
+ 2817, 1313, 192, 2431, 293, -1737, 354, -3257,
+ 1475, 2711, -991, -2767, 2806, 210, 964, 1269,
+ 2238, -385, 901, -1201, 1182, -4113, 861, -1525,
+ -6256, -12, -62, 1465, 1034, 595, -827, -849,
+ 1012, -1290, -2396, -2684, -503, 2473, -1457, 1528,
+ -2172, 2742, -972, -1949, -4060, -3066, -410, -779,
+ -594, 373, 1823, 197, -621, -191, -3124, -4822,
+ -2073, 351, -1115, 2442, -44, 172, -131, -1216,
+ 875, 94, 4502, 1186, 1008, 698, 351, 160,
+ -506, -1202, 1255, -1411, 1864, -2380, -332, -42,
+ 19, 1521, -2319, 634, 3691, 150, -1300, 2018,
+ 2745, 1845, 138, 1121, -430, 3005, 474, 1349,
+ -1484, -3281, 2309, 1758, 2206, 1506, -267, -187,
+ 2478, 6407, -1708, -1994, 741, 2246, -3388, -552,
+ 239, -559, 130, 854, 2832, -463, 304, 5351,
+ -1417, -1113, -5, -1782, 154, 1314, 1410, 284,
+ 1825, -383, 679, -2209, -946, -1933, -1300, 830,
+ 876, 1313, 1328, 1508, -301, 3985, -2731, 697,
+ -2527, -2002, -834, -236, 2619, 2201, -1857, -610,
+ -951, 1685, -1413, -4944, 1479, 2184, -4672, 172,
+ 39, 2138, 207, -509, 2, -364, -3368, 6137,
+ 483, 4936, -7439, -4670, -1214, -3259, 2538, -5904,
+ -166, -3714, -788, 1445, 6256, 908, 941, 6981,
+ -593, 1114, 2186, -2218, -348, -2502, 1961, 1182,
+ -742, 238, 926, 920, -2111, 517, 2210, 191,
+ -3382, -9810, -13597, -7181, 24299, -6002, 8258, 21229,
+ 15072, -19057, -3613, 14832, -15021, 12016, -3219, -11380,
+ -1944, 4180, -6248, -3509, 9254, -619, 6140, 1451,
+ 5216, -7914, -1158, 5757, -2007, 8602, -3177, -452,
+ 3664, -2979, -12114, 1019, -2574, -2650, 2774, -6778,
+ -821, 136, -1717, 665, 7233, -1536, -851, 434,
+ 2075, -14258, 23564, 14664, -15677, 12462, -2884, -8410,
+ -11996, 15482, 7153, -282, 5304, -9404, 12404, -16057,
+ 1198, -127, -7232, 2624, 1463, 6303, 9577, 2998,
+ -12324, 4652, -4785, -3861, -630, -6777, 5040, 3212
+};
+
+static const opus_int16 layer0_bias[16] = {
+ -1246, -4948, 74, -182, 1314, 1022, -968, 2021,
+ -428, 48, -76, -3614, -4573, 264, -335, -3753
+};
+
+static const opus_int16 layer1_weights[576] = {
+ -1111, -1254, 1798, -2602, 3651, 9301, 5408, 1956,
+ 1478, 2304, -963, 320, 2738, 2543, -2005, -1085,
+ -114, -1571, 4910, 1068, 4171, 2313, 1606, 733,
+ -2610, -2959, 2290, -527, -1842, -646, -16, 2005,
+ 283, -1077, -1891, -131, 3992, -3736, -11009, 946,
+ -879, -2992, -728, 1714, -1299, -6849, -7889, 51,
+ 3311, -4404, 3362, -1589, -1069, -414, 2833, 51,
+ -3667, -1329, -444, -3046, 702, -1800, -1539, 2547,
+ 3632, 1717, -1586, -1469, -687, 2218, -236, 49,
+ -311, 1327, -971, -2230, 3053, 2176, 2819, 113,
+ 3450, -8814, -5903, 620, 3764, -2008, -889, 1287,
+ 702, 1576, 8289, 876, -187, -901, -602, 6363,
+ 141, -1538, 1008, -1399, 2652, 2342, -792, -229,
+ 4015, -339, 2396, 2358, -5957, -3011, -9989, -300,
+ -1311, 771, -346, -6502, 747, 1681, -15794, 6796,
+ -1067, 3718, -2932, -3243, -2861, -1526, 3501, 2016,
+ 3428, 1293, 26, -3254, -868, -820, 2181, -1091,
+ -489, -1773, 1598, -2704, 2712, 99, 1321, 72,
+ -2340, 5255, -6217, 2964, 3356, -1230, -3548, -2045,
+ -1352, 795, 3486, -5695, -2230, -1462, -2318, -3059,
+ -2158, 6277, 491, -543, 5419, -4878, -2874, -2366,
+ 974, 1686, -1541, -1632, -2494, 2066, 2744, 1565,
+ -4715, -2288, 653, 78, -1683, 5352, -102, 1683,
+ 4716, -6395, -3046, -629, 1665, 6384, -8447, 2067,
+ -1616, 6815, 2266, -1036, -5038, 2433, -1651, 1100,
+ -3259, 2064, 2361, -2265, 1324, 2891, -314, -2138,
+ -2988, 510, -2769, 2064, 1017, 393, 1768, 1454,
+ -8112, -5234, 5309, 1943, -5209, 7297, 3919, -6962,
+ -2801, 3106, 789, 6443, 1361, -1278, 1161, -4952,
+ 457, -601, -5225, -1984, -1369, 1295, 191, 882,
+ -651, 2795, 1339, 1014, 726, -1006, 3483, 290,
+ -1399, -1251, -2881, -1338, 3136, -5323, 633, -5421,
+ -6290, 3967, 3783, 4605, -2662, -295, -3887, -457,
+ 5213, 3721, 924, -1770, -2616, 3186, -3607, 1911,
+ 130, -3046, -7271, 1173, 5783, 1843, 1085, 3245,
+ -1263, 78, -1060, -1691, -3620, -2132, -209, -580,
+ 1209, -2759, -3882, -5831, -1829, -921, -5332, 1283,
+ -3190, 2349, 1728, -5752, -7430, -6203, 1696, -55,
+ 2174, -2204, 318, 690, -2819, -4307, 1395, 6894,
+ 1441, -1780, 3808, 569, 3798, 928, 1422, -339,
+ -1251, -1287, 2070, 2876, -961, 1005, 7303, 17,
+ -1773, 1397, 319, 3843, 1678, 6099, 6560, 3289,
+ 1865, -638, 732, -2911, 3968, 361, 422, -1089,
+ -1486, 6998, -1845, 2680, 293, 4466, 249, 637,
+ -1471, -1170, -4907, -106, 4637, 542, -2278, 1263,
+ -3205, -3427, -12921, -3277, -1577, -3644, -3593, 2914,
+ 3684, -482, -3260, -3842, -2185, 3918, -3654, -168,
+ -1301, -1121, -303, 1102, -6530, -163, 1887, 2298,
+ -33, -305, -407, -571, -904, 2380, -1370, -589,
+ 636, 851, -22, 1512, -9024, -5379, -653, -4918,
+ -3000, -3675, 3973, -5136, 6238, -3456, -1061, -969,
+ -449, -1220, -3767, -2634, -3361, -757, -3308, 6517,
+ 5625, -1183, -752, -3137, -401, 1344, 3681, -277,
+ 2478, 2315, 788, 3012, -240, 1288, 1235, 1606,
+ 847, 73, 1037, -491, 410, -3203, -1322, 2917,
+ 2233, 5982, -4473, -6050, -4147, 122, -30, -44,
+ -71, -144, -560, 1808, -3543, 1175, 2110, -2488,
+ -1972, -1154, -1688, 2224, -1458, 2123, -937, 2071,
+ 3042, -181, -3693, 1762, -4058, 389, 3015, 3460,
+ -371, -4471, -801, 6941, -1142, 914, -1497, -5451,
+ 1427, 363, -2305, 717, -101, -2243, 787, 2063,
+ 2094, 1753, -4824, -392, 642, -1595, 2284, -355,
+ 723, 704, 4422, 238, -1603, 4658, -261, -1049,
+ -5058, 1302, 8334, 300, 184, 2387, -4650, 920,
+ -1044, 4126, 2278, -1618, -1595, -3917, 3040, -1588,
+ 2545, -554, 4401, 1209, -1611, -4681, 1402, 157,
+ -2734, 1322, 2633, -89, -2124, -3775, -1074, 2343,
+ 653, -2387, -1463, 1026, 1146, 2433, -992, -89,
+ 390, -604, -4066, -3364, 2779, 1317, -3104, -2945,
+ 4261, 8309, 3272, 3126, 897, 1713, -135, 194,
+ -2696, 1554, -1179, -1107, -625, 233, -2899, 1175,
+ 729, 4034, 1992, -1057, -724, 1125, -3964, -1280
+};
+
+static const opus_int16 layer1_recur_weights[432] = {
+ -438, -838, -6192, 5411, -418, 2893, 284, 1692,
+ 724, -6694, 372, 2294, -2420, -986, -181, 3070,
+ -3303, 1708, 2409, 4537, 1035, -2341, 1559, 3677,
+ 6927, 19, 7018, -1246, -6, 764, 1216, 3250,
+ -1130, -4239, 4176, -1841, -364, -11096, 1627, -5613,
+ -5810, -2252, -3298, -4786, -1273, 1114, 4722, 4239,
+ -1604, -848, 534, -472, -3669, -2118, -2768, -1475,
+ 731, 3618, 1301, 262, -1884, 3715, 2816, -397,
+ -2884, -2069, -382, -778, -3494, -5716, 4715, 3827,
+ -5099, 259, -9518, -3708, -768, 600, 6425, -3923,
+ 820, 4019, 2664, 5603, -4372, 1172, -1589, 1831,
+ -874, -2241, 2583, 1217, -5199, -552, 2599, 5865,
+ 4130, 2308, 6881, -3955, 3300, -438, 2953, 2086,
+ -36, -5881, 4261, -737, -1528, -2968, 357, -808,
+ -4266, -5794, -2556, 4370, -3368, -6190, -7920, -3524,
+ -3430, 2304, -394, 3321, 3607, -885, -4667, -4856,
+ -7151, 1654, -1356, -2450, -3054, -2729, -6057, 3589,
+ 2660, 5931, 1632, -1200, -2062, 5428, -5080, -1625,
+ 4027, 258, -871, 2653, 6457, -3976, -1827, 3303,
+ 215, -9023, -6973, 688, 1128, -324, 13, 2964,
+ 1124, 2324, 1648, 1985, -2165, -859, -4202, 2908,
+ -2207, 2688, 314, 5358, 5148, 2579, -73, 248,
+ -1238, 2539, 520, -1776, 3805, 300, -3066, 1107,
+ -2935, 850, 1637, 3337, -406, -8662, -11909, -1224,
+ 5174, 2046, 955, -3673, -140, -1652, -1644, 2844,
+ 2741, 525, -4580, -2051, 2389, 167, -3123, -4217,
+ -3441, 4071, 1916, 6908, -1404, -938, -1956, -3821,
+ -3583, -1661, -9650, -4695, -2647, 3529, -1050, -1390,
+ -941, -8952, -8547, -5131, 1574, 3018, -1347, -3441,
+ 2818, 1877, 922, 203, 1547, -2540, -2669, -3568,
+ 3712, -1858, 1608, 4022, 1949, 1270, 5690, 4952,
+ -2924, -1852, -960, -6592, 4112, -4835, -6366, 947,
+ 1653, 3866, -3543, 424, -1011, -4746, 482, -5315,
+ -1291, -2193, 1034, -2216, -1676, 2701, 854, 2519,
+ 1207, -4291, -2353, -717, 3103, -546, 1223, -4721,
+ -235, -719, 2882, 2164, 866, -1741, -1255, -2969,
+ 4765, -2875, -4220, -3430, -4870, -4859, -2382, -3808,
+ -1145, 1523, -6688, 1423, 331, 824, -3213, 2206,
+ 1176, -6635, 1452, -3581, -4968, 3371, 6670, 478,
+ -896, -1936, -3446, 3845, -2542, -906, -3529, -4821,
+ 6980, 4467, -2353, 3978, 886, -1195, -3932, 3882,
+ 2825, -2174, -3966, 8341, 4275, 8445, -3631, -2451,
+ 4168, -122, -1558, -1961, 1739, -2608, -1198, -1021,
+ -3015, 2149, -3997, -1421, -5459, -33, -4203, 3328,
+ 12, 3219, 3345, 1329, 3197, 4859, -2998, 1177,
+ -2311, 4629, -5004, 513, 4744, 5323, 8186, -269,
+ 5114, -8890, -1964, 7982, -399, -1038, -1705, 777,
+ -326, -1578, 3215, 2023, -1201, 1188, -1852, 3234,
+ 1091, 1777, 3782, -1820, -2942, -954, -910, -1606,
+ 2469, -3312, 3235, 2541, -2422, -2059, 707, -1015,
+ -7480, -2569, -4303, -6153, -3864, 8265, 1891, 2087,
+ -1127, 1155, -2118, -3621, -3438, 1199, 1071, -1461,
+ -2744, 2638, 3131, 518, -434, 7176, -2115, -527,
+ -1903, -1662, -2805, -5871, 2314, -2244, 2819, 7768
+};
+
+static const opus_int16 layer1_bias[36] = {
+ 3484, 1686, 8617, 3821, 2768, 4548, 5706, 5368,
+ 1998, 8007, 4605, 8417, 3054, 1436, 4327, 2667,
+ 913, 4302, 1496, 1808, 883, 922, -415, 4419,
+ 1156, -2037, 1373, -1083, 323, 1726, -668, -59,
+ -866, -3, -662, -2456
+};
+
+static const opus_int16 layer2_weights[24] = {
+ 10570, 495, -6157, -20216, 8597, -3977, -23140, 5295,
+ -2893, 18700, 997, 8626, 2902, 434, -1866, 9536,
+ -830, -15077, -11656, 3090, 18331, 4166, -4320, -9123
+};
-static const float weights[450] = {
+static const opus_int16 layer2_bias[2] = {
+ -1526, 7868
+};
-/* hidden layer */
--0.514624f, 0.0234227f, -0.14329f, -0.0878216f, -0.00187827f,
--0.0257443f, 0.108524f, 0.00333881f, 0.00585017f, -0.0246132f,
-0.142723f, -0.00436494f, 0.0101354f, -0.11124f, -0.0809367f,
--0.0750772f, 0.0295524f, 0.00823944f, 0.150392f, 0.0320876f,
--0.0710564f, -1.43818f, 0.652076f, 0.0650744f, -1.54821f,
-0.168949f, -1.92724f, 0.0517976f, -0.0670737f, -0.0690121f,
-0.00247528f, -0.0522024f, 0.0631368f, 0.0532776f, 0.047751f,
--0.011715f, 0.142374f, -0.0290885f, -0.279263f, -0.433499f,
--0.0795174f, -0.380458f, -0.051263f, 0.218537f, -0.322478f,
-1.06667f, -0.104607f, -4.70108f, 0.312037f, 0.277397f,
--2.71859f, 1.70037f, -0.141845f, 0.0115618f, 0.0629883f,
-0.0403871f, 0.0139428f, -0.00430733f, -0.0429038f, -0.0590318f,
--0.0501526f, -0.0284802f, -0.0415686f, -0.0438999f, 0.0822666f,
-0.197194f, 0.0363275f, -0.0584307f, 0.0752364f, -0.0799796f,
--0.146275f, 0.161661f, -0.184585f, 0.145568f, 0.442823f,
-1.61221f, 1.11162f, 2.62177f, -2.482f, -0.112599f,
--0.110366f, -0.140794f, -0.181694f, 0.0648674f, 0.0842248f,
-0.0933993f, 0.150122f, 0.129171f, 0.176848f, 0.141758f,
--0.271822f, 0.235113f, 0.0668579f, -0.433957f, 0.113633f,
--0.169348f, -1.40091f, 0.62861f, -0.134236f, 0.402173f,
-1.86373f, 1.53998f, -4.32084f, 0.735343f, 0.800214f,
--0.00968415f, 0.0425904f, 0.0196811f, -0.018426f, -0.000343953f,
--0.00416389f, 0.00111558f, 0.0173069f, -0.00998596f, -0.025898f,
-0.00123764f, -0.00520373f, -0.0565033f, 0.0637394f, 0.0051213f,
-0.0221361f, 0.00819962f, -0.0467061f, -0.0548258f, -0.00314063f,
--1.18332f, 1.88091f, -0.41148f, -2.95727f, -0.521449f,
--0.271641f, 0.124946f, -0.0532936f, 0.101515f, 0.000208564f,
--0.0488748f, 0.0642388f, -0.0383848f, 0.0135046f, -0.0413592f,
--0.0326402f, -0.0137421f, -0.0225219f, -0.0917294f, -0.277759f,
--0.185418f, 0.0471128f, -0.125879f, 0.262467f, -0.212794f,
--0.112931f, -1.99885f, -0.404787f, 0.224402f, 0.637962f,
--0.27808f, -0.0723953f, -0.0537655f, -0.0336359f, -0.0906601f,
--0.0641309f, -0.0713542f, 0.0524317f, 0.00608819f, 0.0754101f,
--0.0488401f, -0.00671865f, 0.0418239f, 0.0536284f, -0.132639f,
-0.0267648f, -0.248432f, -0.0104153f, 0.035544f, -0.212753f,
--0.302895f, -0.0357854f, 0.376838f, 0.597025f, -0.664647f,
-0.268422f, -0.376772f, -1.05472f, 0.0144178f, 0.179122f,
-0.0360155f, 0.220262f, -0.0056381f, 0.0317197f, 0.0621066f,
--0.00779298f, 0.00789378f, 0.00350605f, 0.0104809f, 0.0362871f,
--0.157708f, -0.0659779f, -0.0926278f, 0.00770791f, 0.0631621f,
-0.0817343f, -0.424295f, -0.0437727f, -0.24251f, 0.711217f,
--0.736455f, -2.194f, -0.107612f, -0.175156f, -0.0366573f,
--0.0123156f, -0.0628516f, -0.0218977f, -0.00693699f, 0.00695185f,
-0.00507362f, 0.00359334f, 0.0052661f, 0.035561f, 0.0382701f,
-0.0342179f, -0.00790271f, -0.0170925f, 0.047029f, 0.0197362f,
--0.0153435f, 0.0644152f, -0.36862f, -0.0674876f, -2.82672f,
-1.34122f, -0.0788029f, -3.47792f, 0.507246f, -0.816378f,
--0.0142383f, -0.127349f, -0.106926f, -0.0359524f, 0.105045f,
-0.291554f, 0.195413f, 0.0866214f, -0.066577f, -0.102188f,
-0.0979466f, -0.12982f, 0.400181f, -0.409336f, -0.0593326f,
--0.0656203f, -0.204474f, 0.179802f, 0.000509084f, 0.0995954f,
--2.377f, -0.686359f, 0.934861f, 1.10261f, 1.3901f,
--4.33616f, -0.00264017f, 0.00713045f, 0.106264f, 0.143726f,
--0.0685305f, -0.054656f, -0.0176725f, -0.0772669f, -0.0264526f,
--0.0103824f, -0.0269872f, -0.00687f, 0.225804f, 0.407751f,
--0.0612611f, -0.0576863f, -0.180131f, -0.222772f, -0.461742f,
-0.335236f, 1.03399f, 4.24112f, -0.345796f, -0.594549f,
--76.1407f, -0.265276f, 0.0507719f, 0.0643044f, 0.0384832f,
-0.0424459f, -0.0387817f, -0.0235996f, -0.0740556f, -0.0270029f,
-0.00882177f, -0.0552371f, -0.00485851f, 0.314295f, 0.360431f,
--0.0787085f, 0.110355f, -0.415958f, -0.385088f, -0.272224f,
--1.55108f, -0.141848f, 0.448877f, -0.563447f, -2.31403f,
--0.120077f, -1.49918f, -0.817726f, -0.0495854f, -0.0230782f,
--0.0224014f, 0.117076f, 0.0393216f, 0.051997f, 0.0330763f,
--0.110796f, 0.0211117f, -0.0197258f, 0.0187461f, 0.0125183f,
-0.14876f, 0.0920565f, -0.342475f, 0.135272f, -0.168155f,
--0.033423f, -0.0604611f, -0.128835f, 0.664947f, -0.144997f,
-2.27649f, 1.28663f, 0.841217f, -2.42807f, 0.0230471f,
-0.226709f, -0.0374803f, 0.155436f, 0.0400342f, -0.184686f,
-0.128488f, -0.0939518f, -0.0578559f, 0.0265967f, -0.0999322f,
--0.0322768f, -0.322994f, -0.189371f, -0.738069f, -0.0754914f,
-0.214717f, -0.093728f, -0.695741f, 0.0899298f, -2.06188f,
--0.273719f, -0.896977f, 0.130553f, 0.134638f, 1.29355f,
-0.00520749f, -0.0324224f, 0.00530451f, 0.0192385f, 0.00328708f,
-0.0250838f, 0.0053365f, -0.0177321f, 0.00618789f, 0.00525364f,
-0.00104596f, -0.0360459f, 0.0402403f, -0.0406351f, 0.0136883f,
-0.0880722f, -0.0197449f, 0.089938f, 0.0100456f, -0.0475638f,
--0.73267f, 0.037433f, -0.146551f, -0.230221f, -3.06489f,
--1.40194f, 0.0198483f, 0.0397953f, -0.0190239f, 0.0470715f,
--0.131363f, -0.191721f, -0.0176224f, -0.0480352f, -0.221799f,
--0.26794f, -0.0292615f, 0.0612127f, -0.129877f, 0.00628332f,
--0.085918f, 0.0175379f, 0.0541011f, -0.0810874f, -0.380809f,
--0.222056f, -0.508859f, -0.473369f, 0.484958f, -2.28411f,
-0.0139516f,
-/* output layer */
-3.90017f, 1.71789f, -1.43372f, -2.70839f, 1.77107f,
-5.48006f, 1.44661f, 2.01134f, -1.88383f, -3.64958f,
--1.26351f, 0.779421f, 2.11357f, 3.10409f, 1.68846f,
--4.46197f, -1.61455f, 3.59832f, 2.43531f, -1.26458f,
-0.417941f, 1.47437f, 2.16635f, -1.909f, -0.828869f,
-1.38805f, -2.67975f, -0.110044f, 1.95596f, 0.697931f,
--0.313226f, -0.889315f, 0.283236f, 0.946102f, };
+const DenseLayer layer0 = {
+ layer0_bias,
+ layer0_weights,
+ 25, 16, 0
+};
-static const int topo[3] = {25, 16, 2};
+const GRULayer layer1 = {
+ layer1_bias,
+ layer1_weights,
+ layer1_recur_weights,
+ 16, 12
+};
-const MLP net = {
- 3,
- topo,
- weights
+const DenseLayer layer2 = {
+ layer2_bias,
+ layer2_weights,
+ 12, 2, 1
};
+
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 3770fc64..0494170f 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1189,7 +1189,16 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
{
int analysis_bandwidth;
if (st->signal_type == OPUS_AUTO)
- st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
+ {
+ float prob;
+ if (st->prev_mode == 0)
+ prob = analysis_info.music_prob;
+ else if (st->prev_mode == MODE_CELT_ONLY)
+ prob = analysis_info.music_prob_max;
+ else
+ prob = analysis_info.music_prob_min;
+ st->voice_ratio = (int)floor(.5+100*(1-prob));
+ }
analysis_bandwidth = analysis_info.bandwidth;
if (analysis_bandwidth<=12)