Add RNN for VAD and speech/music classificationexp_rnn3

Based on two dense layers with a GRU layer in the middle
author: Jean-Marc Valin <jmvalin@jmvalin.ca> 2017-07-12 16:55:28 -0400
committer: Jean-Marc Valin <jmvalin@jmvalin.ca> 2017-07-17 14:02:59 -0400
commit: bcd006b57f54a183bc91e0d0d37ea3d968a6be33 (patch)
tree: 56d030f18ebb9850f55f2a913e8bd6db000b0178
parent: 2a4f49448f66f664f90edd220b8467d9b06938ab (diff)
download: opus-exp_rnn3.tar.gz
7 files changed, 473 insertions, 355 deletions
diff --git a/celt/celt.h b/celt/celt.h
index 70175301..f73f29dd 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -59,7 +59,8 @@ typedef struct {
    float noisiness;
    float activity;
    float music_prob;
-   float vad_prob;
+   float music_prob_min;
+   float music_prob_max;
    int   bandwidth;
    float activity_probability;
    /* Store as Q6 char to save space. */
diff --git a/src/analysis.c b/src/analysis.c
index f4160e4b..1d6dd829 100644
--- a/src/analysis.c
+++ b/src/analysis.c
@@ -50,6 +50,8 @@
 
 #ifndef DISABLE_FLOAT_API
 
+#define TRANSITION_PENALTY 10
+
 static const float dct_table[128] = {
         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
         0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f, 0.250000f,
@@ -224,19 +226,22 @@ void tonality_analysis_reset(TonalityAnalysisState *tonal)
   /* Clear non-reusable fields. */
   char *start = (char*)&tonal->TONALITY_ANALYSIS_RESET_START;
   OPUS_CLEAR(start, sizeof(TonalityAnalysisState) - (start - (char*)tonal));
-  tonal->music_confidence = .9f;
-  tonal->speech_confidence = .1f;
 }
 
 void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int len)
 {
    int pos;
    int curr_lookahead;
-   float psum;
    float tonality_max;
    float tonality_avg;
    int tonality_count;
    int i;
+   int pos0;
+   float prob_avg;
+   float prob_count;
+   float prob_min, prob_max;
+   float vad_prob;
+   int mpos, vpos;
 
    pos = tonal->read_pos;
    curr_lookahead = tonal->write_pos-tonal->read_pos;
@@ -254,6 +259,7 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
       pos--;
    if (pos<0)
       pos = DETECT_SIZE-1;
+   pos0 = pos;
    OPUS_COPY(info_out, &tonal->info[pos], 1);
    tonality_max = tonality_avg = info_out->tonality;
    tonality_count = 1;
@@ -270,6 +276,107 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
       tonality_count++;
    }
    info_out->tonality = MAX32(tonality_avg/tonality_count, tonality_max-.2f);
+
+   mpos = vpos = pos0;
+   /* If we have enough look-ahead, compensate for the ~5-frame delay in the music prob and
+      ~1 frame delay in the VAD prob. */
+   if (curr_lookahead > 15)
+   {
+      mpos += 5;
+      if (mpos>=DETECT_SIZE)
+         mpos -= DETECT_SIZE;
+      vpos += 1;
+      if (vpos>=DETECT_SIZE)
+         vpos -= DETECT_SIZE;
+   }
+
+   /* The following calculations attempt to minimize a "badness function"
+      for the transition. When switching from speech to music, the badness
+      of switching at frame k is
+      b_k = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T)
+      where
+      v_i is the activity probability (VAD) at frame i,
+      p_i is the music probability at frame i
+      T is the probability threshold for switching
+      S is the penalty for switching during active audio rather than silence
+      the current frame has index i=0
+
+      Rather than apply badness to directly decide when to switch, what we compute
+      instead is the threshold for which the optimal switching point is now. When
+      considering whether to switch now (frame 0) or at frame k, we have:
+      S*v_0 = S*v_k + \sum_{i=0}^{k-1} v_i*(p_i - T)
+      which gives us:
+      T = ( \sum_{i=0}^{k-1} v_i*p_i + S*(v_k-v_0) ) / ( \sum_{i=0}^{k-1} v_i )
+      We take the min threshold across all positive values of k (up to the maximum
+      amount of lookahead we have) to give us the threshold for which the current
+      frame is the optimal switch point.
+
+      The last step is that we need to consider whether we want to switch at all.
+      For that we use the average of the music probability over the entire window.
+      If the threshold is higher than that average we're not going to
+      switch, so we compute a min with the average as well. The result of all these
+      min operations is music_prob_min, which gives the threshold for switching to music
+      if we're currently encoding for speech.
+
+      We do the exact opposite to compute music_prob_max which is used for switching
+      from music to speech.
+    */
+   prob_min = 1.f;
+   prob_max = 0.f;
+   vad_prob = tonal->info[vpos].activity_probability;
+   prob_count = MAX16(.1f, vad_prob);
+   prob_avg = MAX16(.1f, vad_prob)*tonal->info[mpos].music_prob;
+   while (1)
+   {
+      float pos_vad;
+      mpos++;
+      if (mpos==DETECT_SIZE)
+         mpos = 0;
+      if (mpos == tonal->write_pos)
+         break;
+      vpos++;
+      if (vpos==DETECT_SIZE)
+         vpos = 0;
+      if (vpos == tonal->write_pos)
+         break;
+      pos_vad = tonal->info[vpos].activity_probability;
+      prob_min = MIN16((prob_avg - TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_min);
+      prob_max = MAX16((prob_avg + TRANSITION_PENALTY*(vad_prob - pos_vad))/prob_count, prob_max);
+      prob_count += MAX16(.1f, pos_vad);
+      prob_avg += MAX16(.1f, pos_vad)*tonal->info[mpos].music_prob;
+   }
+   info_out->music_prob = prob_avg/prob_count;
+   prob_min = MIN16(prob_avg/prob_count, prob_min);
+   prob_max = MAX16(prob_avg/prob_count, prob_max);
+   prob_min = MAX16(prob_min, 0.f);
+   prob_max = MIN16(prob_max, 1.f);
+
+   /* If we don't have enough look-ahead, do our best to make a decent decision. */
+   if (curr_lookahead < 10)
+   {
+      float pmin, pmax;
+      pmin = prob_min;
+      pmax = prob_max;
+      pos = pos0;
+      /* Look for min/max in the past. */
+      for (i=0;i<IMIN(tonal->count-1, 15);i++)
+      {
+         pos--;
+         if (pos < 0)
+            pos = DETECT_SIZE-1;
+         pmin = MIN16(pmin, tonal->info[pos].music_prob);
+         pmax = MAX16(pmax, tonal->info[pos].music_prob);
+      }
+      /* Bias against switching on active audio. */
+      pmin = MAX16(0.f, pmin - .1f*vad_prob);
+      pmax = MIN16(1.f, pmax + .1f*vad_prob);
+      prob_min += (1.f-.1f*curr_lookahead)*(pmin - prob_min);
+      prob_max += (1.f-.1f*curr_lookahead)*(pmax - prob_max);
+   }
+   info_out->music_prob_min = prob_min;
+   info_out->music_prob_max = prob_max;
+
+   /* printf("%f %f %f %f %f\n", prob_min, prob_max, prob_avg/prob_count, vad_prob, info_out->music_prob); */
    tonal->read_subframe += len/(tonal->Fs/400);
    while (tonal->read_subframe>=8)
    {
@@ -278,21 +385,6 @@ void tonality_get_info(TonalityAnalysisState *tonal, AnalysisInfo *info_out, int
    }
    if (tonal->read_pos>=DETECT_SIZE)
       tonal->read_pos-=DETECT_SIZE;
-
-   /* The -1 is to compensate for the delay in the features themselves. */
-   curr_lookahead = IMAX(curr_lookahead-1, 0);
-
-   psum=0;
-   /* Summing the probability of transition patterns that involve music at
-      time (DETECT_SIZE-curr_lookahead-1) */
-   for (i=0;i<DETECT_SIZE-curr_lookahead;i++)
-      psum += tonal->pmusic[i];
-   for (;i<DETECT_SIZE;i++)
-      psum += tonal->pspeech[i];
-   psum = psum*tonal->music_confidence + (1-psum)*tonal->speech_confidence;
-   /*printf("%f %f %f %f %f\n", psum, info_out->music_prob, info_out->vad_prob, info_out->activity_probability, info_out->tonality);*/
-
-   info_out->music_prob = psum;
 }
 
 static const float std_feature_bias[9] = {
@@ -352,6 +444,7 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
     float band_log2[NB_TBANDS+1];
     float leakage_from[NB_TBANDS+1];
     float leakage_to[NB_TBANDS+1];
+    float layer_out[MAX_NEURONS];
     SAVE_STACK;
 
     alpha = 1.f/IMIN(10, 1+tonal->count);
@@ -368,12 +461,6 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
        offset = 3*offset/2;
     }
 
-    if (tonal->count<4) {
-       if (tonal->application == OPUS_APPLICATION_VOIP)
-          tonal->music_prob = .1f;
-       else
-          tonal->music_prob = .625f;
-    }
     kfft = celt_mode->mdct.kfft[0];
     if (tonal->count==0)
        tonal->mem_fill = 240;
@@ -761,139 +848,17 @@ static void tonality_analysis(TonalityAnalysisState *tonal, const CELTMode *celt
     features[23] = info->tonality_slope + 0.069216f;
     features[24] = tonal->lowECount - 0.067930f;
 
-    mlp_process(&net, features, frame_probs);
-    frame_probs[0] = .5f*(frame_probs[0]+1);
-    /* Curve fitting between the MLP probability and the actual probability */
-    /*frame_probs[0] = .01f + 1.21f*frame_probs[0]*frame_probs[0] - .23f*(float)pow(frame_probs[0], 10);*/
-    /* Probability of active audio (as opposed to silence) */
-    frame_probs[1] = .5f*frame_probs[1]+.5f;
-    frame_probs[1] *= frame_probs[1];
+    compute_dense(&layer0, layer_out, features);
+    compute_gru(&layer1, tonal->rnn_state, layer_out);
+    compute_dense(&layer2, frame_probs, tonal->rnn_state);
 
     /* Probability of speech or music vs noise */
     info->activity_probability = frame_probs[1];
+    /* It seems like the RNN tends to have a bias towards speech and this
+       warping of the probabilities compensates for it. */
+    info->music_prob = frame_probs[0] * (2 - frame_probs[0]);
 
-    /*printf("%f %f\n", frame_probs[0], frame_probs[1]);*/
-    {
-       /* Probability of state transition */
-       float tau;
-       /* Represents independence of the MLP probabilities, where
-          beta=1 means fully independent. */
-       float beta;
-       /* Denormalized probability of speech (p0) and music (p1) after update */
-       float p0, p1;
-       /* Probabilities for "all speech" and "all music" */
-       float s0, m0;
-       /* Probability sum for renormalisation */
-       float psum;
-       /* Instantaneous probability of speech and music, with beta pre-applied. */
-       float speech0;
-       float music0;
-       float p, q;
-
-       /* More silence transitions for speech than for music. */
-       tau = .001f*tonal->music_prob + .01f*(1-tonal->music_prob);
-       p = MAX16(.05f,MIN16(.95f,frame_probs[1]));
-       q = MAX16(.05f,MIN16(.95f,tonal->vad_prob));
-       beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
-       /* p0 and p1 are the probabilities of speech and music at this frame
-          using only information from previous frame and applying the
-          state transition model */
-       p0 = (1-tonal->vad_prob)*(1-tau) +    tonal->vad_prob *tau;
-       p1 =    tonal->vad_prob *(1-tau) + (1-tonal->vad_prob)*tau;
-       /* We apply the current probability with exponent beta to work around
-          the fact that the probability estimates aren't independent. */
-       p0 *= (float)pow(1-frame_probs[1], beta);
-       p1 *= (float)pow(frame_probs[1], beta);
-       /* Normalise the probabilities to get the Marokv probability of music. */
-       tonal->vad_prob = p1/(p0+p1);
-       info->vad_prob = tonal->vad_prob;
-       /* Consider that silence has a 50-50 probability of being speech or music. */
-       frame_probs[0] = tonal->vad_prob*frame_probs[0] + (1-tonal->vad_prob)*.5f;
-
-       /* One transition every 3 minutes of active audio */
-       tau = .0001f;
-       /* Adapt beta based on how "unexpected" the new prob is */
-       p = MAX16(.05f,MIN16(.95f,frame_probs[0]));
-       q = MAX16(.05f,MIN16(.95f,tonal->music_prob));
-       beta = .02f+.05f*ABS16(p-q)/(p*(1-q)+q*(1-p));
-       /* p0 and p1 are the probabilities of speech and music at this frame
-          using only information from previous frame and applying the
-          state transition model */
-       p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
-       p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
-       /* We apply the current probability with exponent beta to work around
-          the fact that the probability estimates aren't independent. */
-       p0 *= (float)pow(1-frame_probs[0], beta);
-       p1 *= (float)pow(frame_probs[0], beta);
-       /* Normalise the probabilities to get the Marokv probability of music. */
-       tonal->music_prob = p1/(p0+p1);
-       info->music_prob = tonal->music_prob;
-
-       /*printf("%f %f %f %f\n", frame_probs[0], frame_probs[1], tonal->music_prob, tonal->vad_prob);*/
-       /* This chunk of code deals with delayed decision. */
-       psum=1e-20f;
-       /* Instantaneous probability of speech and music, with beta pre-applied. */
-       speech0 = (float)pow(1-frame_probs[0], beta);
-       music0  = (float)pow(frame_probs[0], beta);
-       if (tonal->count==1)
-       {
-          if (tonal->application == OPUS_APPLICATION_VOIP)
-             tonal->pmusic[0] = .1f;
-          else
-             tonal->pmusic[0] = .625f;
-          tonal->pspeech[0] = 1-tonal->pmusic[0];
-       }
-       /* Updated probability of having only speech (s0) or only music (m0),
-          before considering the new observation. */
-       s0 = tonal->pspeech[0] + tonal->pspeech[1];
-       m0 = tonal->pmusic [0] + tonal->pmusic [1];
-       /* Updates s0 and m0 with instantaneous probability. */
-       tonal->pspeech[0] = s0*(1-tau)*speech0;
-       tonal->pmusic [0] = m0*(1-tau)*music0;
-       /* Propagate the transition probabilities */
-       for (i=1;i<DETECT_SIZE-1;i++)
-       {
-          tonal->pspeech[i] = tonal->pspeech[i+1]*speech0;
-          tonal->pmusic [i] = tonal->pmusic [i+1]*music0;
-       }
-       /* Probability that the latest frame is speech, when all the previous ones were music. */
-       tonal->pspeech[DETECT_SIZE-1] = m0*tau*speech0;
-       /* Probability that the latest frame is music, when all the previous ones were speech. */
-       tonal->pmusic [DETECT_SIZE-1] = s0*tau*music0;
-
-       /* Renormalise probabilities to 1 */
-       for (i=0;i<DETECT_SIZE;i++)
-          psum += tonal->pspeech[i] + tonal->pmusic[i];
-       psum = 1.f/psum;
-       for (i=0;i<DETECT_SIZE;i++)
-       {
-          tonal->pspeech[i] *= psum;
-          tonal->pmusic [i] *= psum;
-       }
-       psum = tonal->pmusic[0];
-       for (i=1;i<DETECT_SIZE;i++)
-          psum += tonal->pspeech[i];
-
-       /* Estimate our confidence in the speech/music decisions */
-       if (frame_probs[1]>.75)
-       {
-          if (tonal->music_prob>.9)
-          {
-             float adapt;
-             adapt = 1.f/(++tonal->music_confidence_count);
-             tonal->music_confidence_count = IMIN(tonal->music_confidence_count, 500);
-             tonal->music_confidence += adapt*MAX16(-.2f,frame_probs[0]-tonal->music_confidence);
-          }
-          if (tonal->music_prob<.1)
-          {
-             float adapt;
-             adapt = 1.f/(++tonal->speech_confidence_count);
-             tonal->speech_confidence_count = IMIN(tonal->speech_confidence_count, 500);
-             tonal->speech_confidence += adapt*MIN16(.2f,frame_probs[0]-tonal->speech_confidence);
-          }
-       }
-    }
-    tonal->last_music = tonal->music_prob>.5f;
+    /*printf("%f %f %f\n", frame_probs[0], frame_probs[1], info->music_prob);*/
 #ifdef MLP_TRAINING
     for (i=0;i<25;i++)
        printf("%f ", features[i]);
diff --git a/src/analysis.h b/src/analysis.h
index cac51dfa..289c845e 100644
--- a/src/analysis.h
+++ b/src/analysis.h
@@ -30,6 +30,7 @@
 
 #include "celt.h"
 #include "opus_private.h"
+#include "mlp.h"
 
 #define NB_FRAMES 8
 #define NB_TBANDS 18
@@ -64,28 +65,16 @@ typedef struct {
    float mem[32];
    float cmean[8];
    float std[9];
-   float music_prob;
-   float vad_prob;
    float Etracker;
    float lowECount;
    int E_count;
-   int last_music;
    int count;
    int analysis_offset;
-   /** Probability of having speech for time i to DETECT_SIZE-1 (and music before).
-       pspeech[0] is the probability that all frames in the window are speech. */
-   float pspeech[DETECT_SIZE];
-   /** Probability of having music for time i to DETECT_SIZE-1 (and speech before).
-       pmusic[0] is the probability that all frames in the window are music. */
-   float pmusic[DETECT_SIZE];
-   float speech_confidence;
-   float music_confidence;
-   int speech_confidence_count;
-   int music_confidence_count;
    int write_pos;
    int read_pos;
    int read_subframe;
    float hp_ener_accum;
+   float rnn_state[MAX_NEURONS];
    opus_val32 downmix_state[3];
    AnalysisInfo info[DETECT_SIZE];
 } TonalityAnalysisState;
diff --git a/src/mlp.c b/src/mlp.c
index ff9e50df..26c8941f 100644
--- a/src/mlp.c
+++ b/src/mlp.c
@@ -1,5 +1,5 @@
 /* Copyright (c) 2008-2011 Octasic Inc.
-   Written by Jean-Marc Valin */
+                 2012-2017 Jean-Marc Valin */
 /*
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
@@ -29,42 +29,13 @@
 #include "config.h"
 #endif
 
+#include <math.h>
 #include "opus_types.h"
 #include "opus_defines.h"
-
-#include <math.h>
-#include "mlp.h"
 #include "arch.h"
 #include "tansig_table.h"
-#define MAX_NEURONS 100
+#include "mlp.h"
 
-#if 0
-static OPUS_INLINE opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
-{
-    int i;
-    opus_val16 xx; /* Q11 */
-    /*double x, y;*/
-    opus_val16 dy, yy; /* Q14 */
-    /*x = 1.9073e-06*_x;*/
-    if (_x>=QCONST32(8,19))
-        return QCONST32(1.,14);
-    if (_x<=-QCONST32(8,19))
-        return -QCONST32(1.,14);
-    xx = EXTRACT16(SHR32(_x, 8));
-    /*i = lrint(25*x);*/
-    i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
-    /*x -= .04*i;*/
-    xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
-    /*x = xx*(1./2048);*/
-    /*y = tansig_table[250+i];*/
-    yy = tansig_table[250+i];
-    /*y = yy*(1./16384);*/
-    dy = 16384-MULT16_16_Q14(yy,yy);
-    yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
-    return yy;
-}
-#else
-/*extern const float tansig_table[501];*/
 static OPUS_INLINE float tansig_approx(float x)
 {
     int i;
@@ -92,54 +63,97 @@ static OPUS_INLINE float tansig_approx(float x)
     y = y + x*dy*(1 - y*x);
     return sign*y;
 }
-#endif
 
-#if 0
-void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
+static OPUS_INLINE float sigmoid_approx(float x)
 {
-    int j;
-    opus_val16 hidden[MAX_NEURONS];
-    const opus_val16 *W = m->weights;
-    /* Copy to tmp_in */
-    for (j=0;j<m->topo[1];j++)
-    {
-        int k;
-        opus_val32 sum = SHL32(EXTEND32(*W++),8);
-        for (k=0;k<m->topo[0];k++)
-            sum = MAC16_16(sum, in[k],*W++);
-        hidden[j] = tansig_approx(sum);
-    }
-    for (j=0;j<m->topo[2];j++)
-    {
-        int k;
-        opus_val32 sum = SHL32(EXTEND32(*W++),14);
-        for (k=0;k<m->topo[1];k++)
-            sum = MAC16_16(sum, hidden[k], *W++);
-        out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
-    }
+   return .5 + .5*tansig_approx(.5*x);
 }
-#else
-void mlp_process(const MLP *m, const float *in, float *out)
+
+void compute_dense(const DenseLayer *layer, float *output, const float *input)
 {
-    int j;
-    float hidden[MAX_NEURONS];
-    const float *W = m->weights;
-    /* Copy to tmp_in */
-    for (j=0;j<m->topo[1];j++)
-    {
-        int k;
-        float sum = *W++;
-        for (k=0;k<m->topo[0];k++)
-            sum = sum + in[k]**W++;
-        hidden[j] = tansig_approx(sum);
-    }
-    for (j=0;j<m->topo[2];j++)
-    {
-        int k;
-        float sum = *W++;
-        for (k=0;k<m->topo[1];k++)
-            sum = sum + hidden[k]**W++;
-        out[j] = tansig_approx(sum);
-    }
+   int i, j;
+   int N, M;
+   int stride;
+   M = layer->nb_inputs;
+   N = layer->nb_neurons;
+   stride = N;
+   for (i=0;i<N;i++)
+   {
+      /* Compute update gate. */
+      float sum = layer->bias[i];
+      for (j=0;j<M;j++)
+         sum += layer->input_weights[j*stride + i]*input[j];
+      output[i] = WEIGHTS_SCALE*sum;
+   }
+   if (layer->sigmoid) {
+      for (i=0;i<N;i++)
+         output[i] = sigmoid_approx(output[i]);
+   } else {
+      for (i=0;i<N;i++)
+         output[i] = tansig_approx(output[i]);
+   }
+}
+
+void compute_gru(const GRULayer *gru, float *state, const float *input)
+{
+   int i, j;
+   int N, M;
+   int stride;
+   float z[MAX_NEURONS];
+   float r[MAX_NEURONS];
+   float h[MAX_NEURONS];
+   M = gru->nb_inputs;
+   N = gru->nb_neurons;
+   stride = 3*N;
+   for (i=0;i<N;i++)
+   {
+      /* Compute update gate. */
+      float sum = gru->bias[i];
+      for (j=0;j<M;j++)
+         sum += gru->input_weights[j*stride + i]*input[j];
+      for (j=0;j<N;j++)
+         sum += gru->recurrent_weights[j*stride + i]*state[j];
+      z[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
+   }
+   for (i=0;i<N;i++)
+   {
+      /* Compute reset gate. */
+      float sum = gru->bias[N + i];
+      for (j=0;j<M;j++)
+         sum += gru->input_weights[N + j*stride + i]*input[j];
+      for (j=0;j<N;j++)
+         sum += gru->recurrent_weights[N + j*stride + i]*state[j];
+      r[i] = sigmoid_approx(WEIGHTS_SCALE*sum);
+   }
+   for (i=0;i<N;i++)
+   {
+      /* Compute output. */
+      float sum = gru->bias[2*N + i];
+      for (j=0;j<M;j++)
+         sum += gru->input_weights[2*N + j*stride + i]*input[j];
+      for (j=0;j<N;j++)
+         sum += gru->recurrent_weights[2*N + j*stride + i]*state[j]*r[j];
+      h[i] = z[i]*state[i] + (1-z[i])*tansig_approx(WEIGHTS_SCALE*sum);
+   }
+   for (i=0;i<N;i++)
+      state[i] = h[i];
+}
+
+#if 0
+int main() {
+  float state[12] = {0};
+  float input[25];
+  float out0[16];
+  float out[2];
+  while (1)
+  {
+    int i;
+    for (i=0;i<25;i++) scanf("%f", &input[i]);
+    if (feof(stdin)) break;
+    compute_dense(&layer0, out0, input);
+    compute_gru(&layer1, state, out0);
+    compute_dense(&layer2, out, state);
+    printf("%f %f\n", out[0], out[1]);
+  }
 }
 #endif
diff --git a/src/mlp.h b/src/mlp.h
index 618e246e..e3d1e9e5 100644
--- a/src/mlp.h
+++ b/src/mlp.h
@@ -1,5 +1,4 @@
-/* Copyright (c) 2008-2011 Octasic Inc.
-   Written by Jean-Marc Valin */
+/* Copyright (c) 2017 Jean-Marc Valin */
 /*
    Redistribution and use in source and binary forms, with or without
    modification, are permitted provided that the following conditions
@@ -28,16 +27,34 @@
 #ifndef _MLP_H_
 #define _MLP_H_
 
-#include "arch.h"
+#include "opus_types.h"
+
+#define WEIGHTS_SCALE (1.f/8192)
+
+#define MAX_NEURONS 20
 
 typedef struct {
-    int layers;
-    const int *topo;
-    const float *weights;
-} MLP;
+  const opus_int16 *bias;
+  const opus_int16 *input_weights;
+  int nb_inputs;
+  int nb_neurons;
+  int sigmoid;
+} DenseLayer;
+
+typedef struct {
+  const opus_int16 *bias;
+  const opus_int16 *input_weights;
+  const opus_int16 *recurrent_weights;
+  int nb_inputs;
+  int nb_neurons;
+} GRULayer;
+
+extern const DenseLayer layer0;
+extern const GRULayer layer1;
+extern const DenseLayer layer2;
 
-extern const MLP net;
+void compute_dense(const DenseLayer *layer, float *output, const float *input);
 
-void mlp_process(const MLP *m, const float *in, float *out);
+void compute_gru(const GRULayer *gru, float *state, const float *input);
 
 #endif /* _MLP_H_ */
diff --git a/src/mlp_data.c b/src/mlp_data.c
index a819880b..5ddc94d5 100644
--- a/src/mlp_data.c
+++ b/src/mlp_data.c
@@ -1,112 +1,235 @@
+/*This file is automatically generated from a Keras model*/
+
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
 #include "mlp.h"
 
-/* RMS error was 0.280492, seed was 1480478173 */
-/* 0.005976 0.031821 (0.280494 0.280492) done */
+static const opus_int16 layer0_weights[400] = {
+   622, 853, -153, 75, -68, -498, -1936, -291,
+   -60, -293, 880, 977, -492, 245, -1111, -1622,
+   -6366, -362, 91, -1764, 1064, -1579, -2406, 696,
+   216, -850, 316, -4033, -498, -2667, 509, 61,
+   5334, -561, 1022, -3855, -228, -1117, -266, 326,
+   -1669, 262, 2970, 1810, -2451, -3331, -4970, -617,
+   2669, 743, 717, 1942, 2858, 253, -2397, 1525,
+   -1665, -919, -945, -3356, 1598, 469, -5746, 1111,
+   -1328, 1331, -140, -1067, -4318, 461, 2235, 702,
+   905, -45, -734, 779, -2457, -4860, -16, 979,
+   -1769, -1167, -1998, 1009, -6205, -2645, -2309, 2178,
+   1951, 1433, -1456, 1238, -1195, 4550, -587, -1215,
+   -2388, 4203, 1051, 1118, -1861, 3513, -355, 1787,
+   3133, -466, 4455, 1794, -167, -3224, 3442, 1458,
+   -9313, 414, -4165, -872, 2574, -3401, -5647, -861,
+   2817, 1313, 192, 2431, 293, -1737, 354, -3257,
+   1475, 2711, -991, -2767, 2806, 210, 964, 1269,
+   2238, -385, 901, -1201, 1182, -4113, 861, -1525,
+   -6256, -12, -62, 1465, 1034, 595, -827, -849,
+   1012, -1290, -2396, -2684, -503, 2473, -1457, 1528,
+   -2172, 2742, -972, -1949, -4060, -3066, -410, -779,
+   -594, 373, 1823, 197, -621, -191, -3124, -4822,
+   -2073, 351, -1115, 2442, -44, 172, -131, -1216,
+   875, 94, 4502, 1186, 1008, 698, 351, 160,
+   -506, -1202, 1255, -1411, 1864, -2380, -332, -42,
+   19, 1521, -2319, 634, 3691, 150, -1300, 2018,
+   2745, 1845, 138, 1121, -430, 3005, 474, 1349,
+   -1484, -3281, 2309, 1758, 2206, 1506, -267, -187,
+   2478, 6407, -1708, -1994, 741, 2246, -3388, -552,
+   239, -559, 130, 854, 2832, -463, 304, 5351,
+   -1417, -1113, -5, -1782, 154, 1314, 1410, 284,
+   1825, -383, 679, -2209, -946, -1933, -1300, 830,
+   876, 1313, 1328, 1508, -301, 3985, -2731, 697,
+   -2527, -2002, -834, -236, 2619, 2201, -1857, -610,
+   -951, 1685, -1413, -4944, 1479, 2184, -4672, 172,
+   39, 2138, 207, -509, 2, -364, -3368, 6137,
+   483, 4936, -7439, -4670, -1214, -3259, 2538, -5904,
+   -166, -3714, -788, 1445, 6256, 908, 941, 6981,
+   -593, 1114, 2186, -2218, -348, -2502, 1961, 1182,
+   -742, 238, 926, 920, -2111, 517, 2210, 191,
+   -3382, -9810, -13597, -7181, 24299, -6002, 8258, 21229,
+   15072, -19057, -3613, 14832, -15021, 12016, -3219, -11380,
+   -1944, 4180, -6248, -3509, 9254, -619, 6140, 1451,
+   5216, -7914, -1158, 5757, -2007, 8602, -3177, -452,
+   3664, -2979, -12114, 1019, -2574, -2650, 2774, -6778,
+   -821, 136, -1717, 665, 7233, -1536, -851, 434,
+   2075, -14258, 23564, 14664, -15677, 12462, -2884, -8410,
+   -11996, 15482, 7153, -282, 5304, -9404, 12404, -16057,
+   1198, -127, -7232, 2624, 1463, 6303, 9577, 2998,
+   -12324, 4652, -4785, -3861, -630, -6777, 5040, 3212
+};
+
+static const opus_int16 layer0_bias[16] = {
+   -1246, -4948, 74, -182, 1314, 1022, -968, 2021,
+   -428, 48, -76, -3614, -4573, 264, -335, -3753
+};
+
+static const opus_int16 layer1_weights[576] = {
+   -1111, -1254, 1798, -2602, 3651, 9301, 5408, 1956,
+   1478, 2304, -963, 320, 2738, 2543, -2005, -1085,
+   -114, -1571, 4910, 1068, 4171, 2313, 1606, 733,
+   -2610, -2959, 2290, -527, -1842, -646, -16, 2005,
+   283, -1077, -1891, -131, 3992, -3736, -11009, 946,
+   -879, -2992, -728, 1714, -1299, -6849, -7889, 51,
+   3311, -4404, 3362, -1589, -1069, -414, 2833, 51,
+   -3667, -1329, -444, -3046, 702, -1800, -1539, 2547,
+   3632, 1717, -1586, -1469, -687, 2218, -236, 49,
+   -311, 1327, -971, -2230, 3053, 2176, 2819, 113,
+   3450, -8814, -5903, 620, 3764, -2008, -889, 1287,
+   702, 1576, 8289, 876, -187, -901, -602, 6363,
+   141, -1538, 1008, -1399, 2652, 2342, -792, -229,
+   4015, -339, 2396, 2358, -5957, -3011, -9989, -300,
+   -1311, 771, -346, -6502, 747, 1681, -15794, 6796,
+   -1067, 3718, -2932, -3243, -2861, -1526, 3501, 2016,
+   3428, 1293, 26, -3254, -868, -820, 2181, -1091,
+   -489, -1773, 1598, -2704, 2712, 99, 1321, 72,
+   -2340, 5255, -6217, 2964, 3356, -1230, -3548, -2045,
+   -1352, 795, 3486, -5695, -2230, -1462, -2318, -3059,
+   -2158, 6277, 491, -543, 5419, -4878, -2874, -2366,
+   974, 1686, -1541, -1632, -2494, 2066, 2744, 1565,
+   -4715, -2288, 653, 78, -1683, 5352, -102, 1683,
+   4716, -6395, -3046, -629, 1665, 6384, -8447, 2067,
+   -1616, 6815, 2266, -1036, -5038, 2433, -1651, 1100,
+   -3259, 2064, 2361, -2265, 1324, 2891, -314, -2138,
+   -2988, 510, -2769, 2064, 1017, 393, 1768, 1454,
+   -8112, -5234, 5309, 1943, -5209, 7297, 3919, -6962,
+   -2801, 3106, 789, 6443, 1361, -1278, 1161, -4952,
+   457, -601, -5225, -1984, -1369, 1295, 191, 882,
+   -651, 2795, 1339, 1014, 726, -1006, 3483, 290,
+   -1399, -1251, -2881, -1338, 3136, -5323, 633, -5421,
+   -6290, 3967, 3783, 4605, -2662, -295, -3887, -457,
+   5213, 3721, 924, -1770, -2616, 3186, -3607, 1911,
+   130, -3046, -7271, 1173, 5783, 1843, 1085, 3245,
+   -1263, 78, -1060, -1691, -3620, -2132, -209, -580,
+   1209, -2759, -3882, -5831, -1829, -921, -5332, 1283,
+   -3190, 2349, 1728, -5752, -7430, -6203, 1696, -55,
+   2174, -2204, 318, 690, -2819, -4307, 1395, 6894,
+   1441, -1780, 3808, 569, 3798, 928, 1422, -339,
+   -1251, -1287, 2070, 2876, -961, 1005, 7303, 17,
+   -1773, 1397, 319, 3843, 1678, 6099, 6560, 3289,
+   1865, -638, 732, -2911, 3968, 361, 422, -1089,
+   -1486, 6998, -1845, 2680, 293, 4466, 249, 637,
+   -1471, -1170, -4907, -106, 4637, 542, -2278, 1263,
+   -3205, -3427, -12921, -3277, -1577, -3644, -3593, 2914,
+   3684, -482, -3260, -3842, -2185, 3918, -3654, -168,
+   -1301, -1121, -303, 1102, -6530, -163, 1887, 2298,
+   -33, -305, -407, -571, -904, 2380, -1370, -589,
+   636, 851, -22, 1512, -9024, -5379, -653, -4918,
+   -3000, -3675, 3973, -5136, 6238, -3456, -1061, -969,
+   -449, -1220, -3767, -2634, -3361, -757, -3308, 6517,
+   5625, -1183, -752, -3137, -401, 1344, 3681, -277,
+   2478, 2315, 788, 3012, -240, 1288, 1235, 1606,
+   847, 73, 1037, -491, 410, -3203, -1322, 2917,
+   2233, 5982, -4473, -6050, -4147, 122, -30, -44,
+   -71, -144, -560, 1808, -3543, 1175, 2110, -2488,
+   -1972, -1154, -1688, 2224, -1458, 2123, -937, 2071,
+   3042, -181, -3693, 1762, -4058, 389, 3015, 3460,
+   -371, -4471, -801, 6941, -1142, 914, -1497, -5451,
+   1427, 363, -2305, 717, -101, -2243, 787, 2063,
+   2094, 1753, -4824, -392, 642, -1595, 2284, -355,
+   723, 704, 4422, 238, -1603, 4658, -261, -1049,
+   -5058, 1302, 8334, 300, 184, 2387, -4650, 920,
+   -1044, 4126, 2278, -1618, -1595, -3917, 3040, -1588,
+   2545, -554, 4401, 1209, -1611, -4681, 1402, 157,
+   -2734, 1322, 2633, -89, -2124, -3775, -1074, 2343,
+   653, -2387, -1463, 1026, 1146, 2433, -992, -89,
+   390, -604, -4066, -3364, 2779, 1317, -3104, -2945,
+   4261, 8309, 3272, 3126, 897, 1713, -135, 194,
+   -2696, 1554, -1179, -1107, -625, 233, -2899, 1175,
+   729, 4034, 1992, -1057, -724, 1125, -3964, -1280
+};
+
+static const opus_int16 layer1_recur_weights[432] = {
+   -438, -838, -6192, 5411, -418, 2893, 284, 1692,
+   724, -6694, 372, 2294, -2420, -986, -181, 3070,
+   -3303, 1708, 2409, 4537, 1035, -2341, 1559, 3677,
+   6927, 19, 7018, -1246, -6, 764, 1216, 3250,
+   -1130, -4239, 4176, -1841, -364, -11096, 1627, -5613,
+   -5810, -2252, -3298, -4786, -1273, 1114, 4722, 4239,
+   -1604, -848, 534, -472, -3669, -2118, -2768, -1475,
+   731, 3618, 1301, 262, -1884, 3715, 2816, -397,
+   -2884, -2069, -382, -778, -3494, -5716, 4715, 3827,
+   -5099, 259, -9518, -3708, -768, 600, 6425, -3923,
+   820, 4019, 2664, 5603, -4372, 1172, -1589, 1831,
+   -874, -2241, 2583, 1217, -5199, -552, 2599, 5865,
+   4130, 2308, 6881, -3955, 3300, -438, 2953, 2086,
+   -36, -5881, 4261, -737, -1528, -2968, 357, -808,
+   -4266, -5794, -2556, 4370, -3368, -6190, -7920, -3524,
+   -3430, 2304, -394, 3321, 3607, -885, -4667, -4856,
+   -7151, 1654, -1356, -2450, -3054, -2729, -6057, 3589,
+   2660, 5931, 1632, -1200, -2062, 5428, -5080, -1625,
+   4027, 258, -871, 2653, 6457, -3976, -1827, 3303,
+   215, -9023, -6973, 688, 1128, -324, 13, 2964,
+   1124, 2324, 1648, 1985, -2165, -859, -4202, 2908,
+   -2207, 2688, 314, 5358, 5148, 2579, -73, 248,
+   -1238, 2539, 520, -1776, 3805, 300, -3066, 1107,
+   -2935, 850, 1637, 3337, -406, -8662, -11909, -1224,
+   5174, 2046, 955, -3673, -140, -1652, -1644, 2844,
+   2741, 525, -4580, -2051, 2389, 167, -3123, -4217,
+   -3441, 4071, 1916, 6908, -1404, -938, -1956, -3821,
+   -3583, -1661, -9650, -4695, -2647, 3529, -1050, -1390,
+   -941, -8952, -8547, -5131, 1574, 3018, -1347, -3441,
+   2818, 1877, 922, 203, 1547, -2540, -2669, -3568,
+   3712, -1858, 1608, 4022, 1949, 1270, 5690, 4952,
+   -2924, -1852, -960, -6592, 4112, -4835, -6366, 947,
+   1653, 3866, -3543, 424, -1011, -4746, 482, -5315,
+   -1291, -2193, 1034, -2216, -1676, 2701, 854, 2519,
+   1207, -4291, -2353, -717, 3103, -546, 1223, -4721,
+   -235, -719, 2882, 2164, 866, -1741, -1255, -2969,
+   4765, -2875, -4220, -3430, -4870, -4859, -2382, -3808,
+   -1145, 1523, -6688, 1423, 331, 824, -3213, 2206,
+   1176, -6635, 1452, -3581, -4968, 3371, 6670, 478,
+   -896, -1936, -3446, 3845, -2542, -906, -3529, -4821,
+   6980, 4467, -2353, 3978, 886, -1195, -3932, 3882,
+   2825, -2174, -3966, 8341, 4275, 8445, -3631, -2451,
+   4168, -122, -1558, -1961, 1739, -2608, -1198, -1021,
+   -3015, 2149, -3997, -1421, -5459, -33, -4203, 3328,
+   12, 3219, 3345, 1329, 3197, 4859, -2998, 1177,
+   -2311, 4629, -5004, 513, 4744, 5323, 8186, -269,
+   5114, -8890, -1964, 7982, -399, -1038, -1705, 777,
+   -326, -1578, 3215, 2023, -1201, 1188, -1852, 3234,
+   1091, 1777, 3782, -1820, -2942, -954, -910, -1606,
+   2469, -3312, 3235, 2541, -2422, -2059, 707, -1015,
+   -7480, -2569, -4303, -6153, -3864, 8265, 1891, 2087,
+   -1127, 1155, -2118, -3621, -3438, 1199, 1071, -1461,
+   -2744, 2638, 3131, 518, -434, 7176, -2115, -527,
+   -1903, -1662, -2805, -5871, 2314, -2244, 2819, 7768
+};
+
+static const opus_int16 layer1_bias[36] = {
+   3484, 1686, 8617, 3821, 2768, 4548, 5706, 5368,
+   1998, 8007, 4605, 8417, 3054, 1436, 4327, 2667,
+   913, 4302, 1496, 1808, 883, 922, -415, 4419,
+   1156, -2037, 1373, -1083, 323, 1726, -668, -59,
+   -866, -3, -662, -2456
+};
+
+static const opus_int16 layer2_weights[24] = {
+   10570, 495, -6157, -20216, 8597, -3977, -23140, 5295,
+   -2893, 18700, 997, 8626, 2902, 434, -1866, 9536,
+   -830, -15077, -11656, 3090, 18331, 4166, -4320, -9123
+};
 
-static const float weights[450] = {
+static const opus_int16 layer2_bias[2] = {
+   -1526, 7868
+};
 
-/* hidden layer */
--0.514624f, 0.0234227f, -0.14329f, -0.0878216f, -0.00187827f,
--0.0257443f, 0.108524f, 0.00333881f, 0.00585017f, -0.0246132f,
-0.142723f, -0.00436494f, 0.0101354f, -0.11124f, -0.0809367f,
--0.0750772f, 0.0295524f, 0.00823944f, 0.150392f, 0.0320876f,
--0.0710564f, -1.43818f, 0.652076f, 0.0650744f, -1.54821f,
-0.168949f, -1.92724f, 0.0517976f, -0.0670737f, -0.0690121f,
-0.00247528f, -0.0522024f, 0.0631368f, 0.0532776f, 0.047751f,
--0.011715f, 0.142374f, -0.0290885f, -0.279263f, -0.433499f,
--0.0795174f, -0.380458f, -0.051263f, 0.218537f, -0.322478f,
-1.06667f, -0.104607f, -4.70108f, 0.312037f, 0.277397f,
--2.71859f, 1.70037f, -0.141845f, 0.0115618f, 0.0629883f,
-0.0403871f, 0.0139428f, -0.00430733f, -0.0429038f, -0.0590318f,
--0.0501526f, -0.0284802f, -0.0415686f, -0.0438999f, 0.0822666f,
-0.197194f, 0.0363275f, -0.0584307f, 0.0752364f, -0.0799796f,
--0.146275f, 0.161661f, -0.184585f, 0.145568f, 0.442823f,
-1.61221f, 1.11162f, 2.62177f, -2.482f, -0.112599f,
--0.110366f, -0.140794f, -0.181694f, 0.0648674f, 0.0842248f,
-0.0933993f, 0.150122f, 0.129171f, 0.176848f, 0.141758f,
--0.271822f, 0.235113f, 0.0668579f, -0.433957f, 0.113633f,
--0.169348f, -1.40091f, 0.62861f, -0.134236f, 0.402173f,
-1.86373f, 1.53998f, -4.32084f, 0.735343f, 0.800214f,
--0.00968415f, 0.0425904f, 0.0196811f, -0.018426f, -0.000343953f,
--0.00416389f, 0.00111558f, 0.0173069f, -0.00998596f, -0.025898f,
-0.00123764f, -0.00520373f, -0.0565033f, 0.0637394f, 0.0051213f,
-0.0221361f, 0.00819962f, -0.0467061f, -0.0548258f, -0.00314063f,
--1.18332f, 1.88091f, -0.41148f, -2.95727f, -0.521449f,
--0.271641f, 0.124946f, -0.0532936f, 0.101515f, 0.000208564f,
--0.0488748f, 0.0642388f, -0.0383848f, 0.0135046f, -0.0413592f,
--0.0326402f, -0.0137421f, -0.0225219f, -0.0917294f, -0.277759f,
--0.185418f, 0.0471128f, -0.125879f, 0.262467f, -0.212794f,
--0.112931f, -1.99885f, -0.404787f, 0.224402f, 0.637962f,
--0.27808f, -0.0723953f, -0.0537655f, -0.0336359f, -0.0906601f,
--0.0641309f, -0.0713542f, 0.0524317f, 0.00608819f, 0.0754101f,
--0.0488401f, -0.00671865f, 0.0418239f, 0.0536284f, -0.132639f,
-0.0267648f, -0.248432f, -0.0104153f, 0.035544f, -0.212753f,
--0.302895f, -0.0357854f, 0.376838f, 0.597025f, -0.664647f,
-0.268422f, -0.376772f, -1.05472f, 0.0144178f, 0.179122f,
-0.0360155f, 0.220262f, -0.0056381f, 0.0317197f, 0.0621066f,
--0.00779298f, 0.00789378f, 0.00350605f, 0.0104809f, 0.0362871f,
--0.157708f, -0.0659779f, -0.0926278f, 0.00770791f, 0.0631621f,
-0.0817343f, -0.424295f, -0.0437727f, -0.24251f, 0.711217f,
--0.736455f, -2.194f, -0.107612f, -0.175156f, -0.0366573f,
--0.0123156f, -0.0628516f, -0.0218977f, -0.00693699f, 0.00695185f,
-0.00507362f, 0.00359334f, 0.0052661f, 0.035561f, 0.0382701f,
-0.0342179f, -0.00790271f, -0.0170925f, 0.047029f, 0.0197362f,
--0.0153435f, 0.0644152f, -0.36862f, -0.0674876f, -2.82672f,
-1.34122f, -0.0788029f, -3.47792f, 0.507246f, -0.816378f,
--0.0142383f, -0.127349f, -0.106926f, -0.0359524f, 0.105045f,
-0.291554f, 0.195413f, 0.0866214f, -0.066577f, -0.102188f,
-0.0979466f, -0.12982f, 0.400181f, -0.409336f, -0.0593326f,
--0.0656203f, -0.204474f, 0.179802f, 0.000509084f, 0.0995954f,
--2.377f, -0.686359f, 0.934861f, 1.10261f, 1.3901f,
--4.33616f, -0.00264017f, 0.00713045f, 0.106264f, 0.143726f,
--0.0685305f, -0.054656f, -0.0176725f, -0.0772669f, -0.0264526f,
--0.0103824f, -0.0269872f, -0.00687f, 0.225804f, 0.407751f,
--0.0612611f, -0.0576863f, -0.180131f, -0.222772f, -0.461742f,
-0.335236f, 1.03399f, 4.24112f, -0.345796f, -0.594549f,
--76.1407f, -0.265276f, 0.0507719f, 0.0643044f, 0.0384832f,
-0.0424459f, -0.0387817f, -0.0235996f, -0.0740556f, -0.0270029f,
-0.00882177f, -0.0552371f, -0.00485851f, 0.314295f, 0.360431f,
--0.0787085f, 0.110355f, -0.415958f, -0.385088f, -0.272224f,
--1.55108f, -0.141848f, 0.448877f, -0.563447f, -2.31403f,
--0.120077f, -1.49918f, -0.817726f, -0.0495854f, -0.0230782f,
--0.0224014f, 0.117076f, 0.0393216f, 0.051997f, 0.0330763f,
--0.110796f, 0.0211117f, -0.0197258f, 0.0187461f, 0.0125183f,
-0.14876f, 0.0920565f, -0.342475f, 0.135272f, -0.168155f,
--0.033423f, -0.0604611f, -0.128835f, 0.664947f, -0.144997f,
-2.27649f, 1.28663f, 0.841217f, -2.42807f, 0.0230471f,
-0.226709f, -0.0374803f, 0.155436f, 0.0400342f, -0.184686f,
-0.128488f, -0.0939518f, -0.0578559f, 0.0265967f, -0.0999322f,
--0.0322768f, -0.322994f, -0.189371f, -0.738069f, -0.0754914f,
-0.214717f, -0.093728f, -0.695741f, 0.0899298f, -2.06188f,
--0.273719f, -0.896977f, 0.130553f, 0.134638f, 1.29355f,
-0.00520749f, -0.0324224f, 0.00530451f, 0.0192385f, 0.00328708f,
-0.0250838f, 0.0053365f, -0.0177321f, 0.00618789f, 0.00525364f,
-0.00104596f, -0.0360459f, 0.0402403f, -0.0406351f, 0.0136883f,
-0.0880722f, -0.0197449f, 0.089938f, 0.0100456f, -0.0475638f,
--0.73267f, 0.037433f, -0.146551f, -0.230221f, -3.06489f,
--1.40194f, 0.0198483f, 0.0397953f, -0.0190239f, 0.0470715f,
--0.131363f, -0.191721f, -0.0176224f, -0.0480352f, -0.221799f,
--0.26794f, -0.0292615f, 0.0612127f, -0.129877f, 0.00628332f,
--0.085918f, 0.0175379f, 0.0541011f, -0.0810874f, -0.380809f,
--0.222056f, -0.508859f, -0.473369f, 0.484958f, -2.28411f,
-0.0139516f,
-/* output layer */
-3.90017f, 1.71789f, -1.43372f, -2.70839f, 1.77107f,
-5.48006f, 1.44661f, 2.01134f, -1.88383f, -3.64958f,
--1.26351f, 0.779421f, 2.11357f, 3.10409f, 1.68846f,
--4.46197f, -1.61455f, 3.59832f, 2.43531f, -1.26458f,
-0.417941f, 1.47437f, 2.16635f, -1.909f, -0.828869f,
-1.38805f, -2.67975f, -0.110044f, 1.95596f, 0.697931f,
--0.313226f, -0.889315f, 0.283236f, 0.946102f, };
+const DenseLayer layer0 = {
+   layer0_bias,
+   layer0_weights,
+   25, 16, 0
+};
 
-static const int topo[3] = {25, 16, 2};
+const GRULayer layer1 = {
+   layer1_bias,
+   layer1_weights,
+   layer1_recur_weights,
+   16, 12
+};
 
-const MLP net = {
-    3,
-    topo,
-    weights
+const DenseLayer layer2 = {
+   layer2_bias,
+   layer2_weights,
+   12, 2, 1
 };
+
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index 3770fc64..0494170f 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -1189,7 +1189,16 @@ opus_int32 opus_encode_native(OpusEncoder *st, const opus_val16 *pcm, int frame_
     {
        int analysis_bandwidth;
        if (st->signal_type == OPUS_AUTO)
-          st->voice_ratio = (int)floor(.5+100*(1-analysis_info.music_prob));
+       {
+          float prob;
+          if (st->prev_mode == 0)
+             prob = analysis_info.music_prob;
+          else if (st->prev_mode == MODE_CELT_ONLY)
+             prob = analysis_info.music_prob_max;
+          else
+             prob = analysis_info.music_prob_min;
+          st->voice_ratio = (int)floor(.5+100*(1-prob));
+       }
 
        analysis_bandwidth = analysis_info.bandwidth;
        if (analysis_bandwidth<=12)
author	Jean-Marc Valin <jmvalin@jmvalin.ca>	2017-07-12 16:55:28 -0400
committer	Jean-Marc Valin <jmvalin@jmvalin.ca>	2017-07-17 14:02:59 -0400
commit	bcd006b57f54a183bc91e0d0d37ea3d968a6be33 (patch)
tree	56d030f18ebb9850f55f2a913e8bd6db000b0178
parent	2a4f49448f66f664f90edd220b8467d9b06938ab (diff)
download	opus-exp_rnn3.tar.gz