21 files changed, 2071 insertions, 235 deletions
diff --git a/Makefile.am b/Makefile.am
index db37d998..07876626 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -14,6 +14,7 @@ if FIXED_POINT
 SILK_SOURCES += $(SILK_SOURCES_FIXED)
 else
 SILK_SOURCES += $(SILK_SOURCES_FLOAT)
+OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
 endif
 
 include celt_headers.mk
diff --git a/celt/bands.c b/celt/bands.c
index f38b6626..531d3118 100644
--- a/celt/bands.c
+++ b/celt/bands.c
@@ -41,6 +41,21 @@
 #include "mathops.h"
 #include "rate.h"
 
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
+{
+   int i;
+   for (i=0;i<N;i++)
+   {
+      if (val < thresholds[i])
+         break;
+   }
+   if (i>prev && val < thresholds[prev]+hysteresis[prev])
+      i=prev;
+   if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
+      i=prev;
+   return i;
+}
+
 opus_uint32 celt_lcg_rand(opus_uint32 seed)
 {
    return 1664525 * seed + 1013904223;
diff --git a/celt/bands.h b/celt/bands.h
index 9ff8ffd7..47d15b6d 100644
--- a/celt/bands.h
+++ b/celt/bands.h
@@ -92,4 +92,6 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
 
 opus_uint32 celt_lcg_rand(opus_uint32 seed);
 
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev);
+
 #endif /* BANDS_H */
diff --git a/celt/celt.c b/celt/celt.c
index adb9737c..cd783f12 100644
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -178,6 +178,7 @@ struct OpusCustomEncoder {
    int prefilter_tapset_old;
 #endif
    int consec_transient;
+   AnalysisInfo analysis;
 
    opus_val32 preemph_memE[2];
    opus_val32 preemph_memD[2];
@@ -187,6 +188,9 @@ struct OpusCustomEncoder {
    opus_int32 vbr_drift;
    opus_int32 vbr_offset;
    opus_int32 vbr_count;
+   opus_val16 overlap_max;
+   opus_val16 stereo_saving;
+   int intensity;
 
 #ifdef RESYNTH
    celt_sig syn_mem[2][2*MAX_PERIOD];
@@ -294,92 +298,128 @@ static inline opus_val16 SIG2WORD16(celt_sig x)
 }
 
 static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
-                              int overlap)
+                              int overlap, opus_val16 *tf_estimate, int *tf_chan, AnalysisInfo *analysis)
 {
    int i;
    VARDECL(opus_val16, tmp);
-   opus_val32 mem0=0,mem1=0;
+   opus_val32 mem0,mem1;
    int is_transient = 0;
    int block;
-   int N;
+   int c, N;
+   opus_val16 maxbin;
+   int tf_max;
    VARDECL(opus_val16, bins);
+   opus_val16 T1, T2, T3, T4, T5;
+   opus_val16 follower;
+   int metric=0;
+   int fmetric=0, bmetric=0;
+   int count1, count2, count3, count4, count5;;
+
    SAVE_STACK;
    ALLOC(tmp, len, opus_val16);
 
-   block = overlap/2;
-   N=len/block;
+   block = overlap/4;
+   N=len/block-1;
    ALLOC(bins, N, opus_val16);
-   if (C==1)
+
+   tf_max = 0;
+   for (c=0;c<C;c++)
    {
+      mem0=0;
+      mem1=0;
       for (i=0;i<len;i++)
-         tmp[i] = SHR32(in[i],SIG_SHIFT);
-   } else {
-      for (i=0;i<len;i++)
-         tmp[i] = SHR32(ADD32(in[i],in[i+len]), SIG_SHIFT+1);
-   }
+         tmp[i] = SHR32(in[i+c*len],SIG_SHIFT);
 
-   /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
-   for (i=0;i<len;i++)
-   {
-      opus_val32 x,y;
-      x = tmp[i];
-      y = ADD32(mem0, x);
+      /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x,y;
+         x = tmp[i];
+         y = ADD32(mem0, x);
 #ifdef FIXED_POINT
-      mem0 = mem1 + y - SHL32(x,1);
-      mem1 = x - SHR32(y,1);
+         mem0 = mem1 + y - SHL32(x,1);
+         mem1 = x - SHR32(y,1);
 #else
-      mem0 = mem1 + y - 2*x;
-      mem1 = x - .5f*y;
+         mem0 = mem1 + y - 2*x;
+         mem1 = x - .5f*y;
 #endif
-      tmp[i] = EXTRACT16(SHR32(y,2));
-   }
-   /* First few samples are bad because we don't propagate the memory */
-   for (i=0;i<12;i++)
-      tmp[i] = 0;
+         tmp[i] = EXTRACT16(SHR(y,2));
+      }
+      /* First few samples are bad because we don't propagate the memory */
+      for (i=0;i<12;i++)
+         tmp[i] = 0;
 
-   for (i=0;i<N;i++)
-   {
-      int j;
-      opus_val16 max_abs=0;
-      for (j=0;j<block;j++)
-         max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
-      bins[i] = max_abs;
-   }
-   for (i=0;i<N;i++)
-   {
-      int j;
-      int conseq=0;
-      opus_val16 t1, t2, t3;
-
-      t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
-      t2 = MULT16_16_Q15(QCONST16(.4f, 15), bins[i]);
-      t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
-      for (j=0;j<i;j++)
-      {
-         if (bins[j] < t1)
-            conseq++;
-         if (bins[j] < t2)
-            conseq++;
-         else
-            conseq = 0;
+      maxbin=0;
+      for (i=0;i<N;i++)
+      {
+         int j;
+         opus_val16 max_abs=0;
+         for (j=0;j<2*block;j++)
+            max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
+         bins[i] = max_abs;
+         maxbin = MAX16(maxbin, bins[i]);
       }
-      if (conseq>=3)
+
+      T1 = QCONST16(.09f, 15);
+      T2 = QCONST16(.12f, 15);
+      T3 = QCONST16(.18f, 15);
+      T4 = QCONST16(.28f, 15);
+      T5 = QCONST16(.4f, 15);
+
+      follower = 0;
+      count1=count2=count3=count4=count5=0;
+      for (i=0;i<N;i++)
+      {
+         follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower));
+         if (bins[i] < MULT16_16_Q15(T1, follower))
+            count1++;
+         if (bins[i] < MULT16_16_Q15(T2, follower))
+            count2++;
+         if (bins[i] < MULT16_16_Q15(T3, follower))
+            count3++;
+         if (bins[i] < MULT16_16_Q15(T4, follower))
+            count4++;
+         if (bins[i] < MULT16_16_Q15(T5, follower))
+            count5++;
+      }
+      fmetric = (5*count1 + 4*count2 + 3*count3 + 2*count4 + count5)/2;
+      follower=0;
+      count1=count2=count3=count4=count5=0;
+      for (i=N-1;i>=0;i--)
+      {
+         follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower));
+         if (bins[i] < MULT16_16_Q15(T1, follower))
+            count1++;
+         if (bins[i] < MULT16_16_Q15(T2, follower))
+            count2++;
+         if (bins[i] < MULT16_16_Q15(T3, follower))
+            count3++;
+         if (bins[i] < MULT16_16_Q15(T4, follower))
+            count4++;
+         if (bins[i] < MULT16_16_Q15(T5, follower))
+            count5++;
+      }
+      bmetric = 5*count1 + 4*count2 + 3*count3 + 2*count4 + count5;
+      metric = fmetric+bmetric;
+
+      /*if (metric>40)*/
+      if (metric>20+50*MAX16(analysis->tonality, analysis->noisiness))
          is_transient=1;
-      conseq = 0;
-      for (j=i+1;j<N;j++)
+
+      if (metric>tf_max)
       {
-         if (bins[j] < t3)
-            conseq++;
-         else
-            conseq = 0;
+         *tf_chan = c;
+         tf_max = metric;
       }
-      if (conseq>=7)
-         is_transient=1;
    }
+   /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
+   *tf_estimate = QCONST16(1.f, 14) + celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),IMIN(163,tf_max)),14)-QCONST32(0.139,28)));
+
    RESTORE_STACK;
 #ifdef FUZZING
    is_transient = rand()&0x1;
 #endif
+   /*printf("%d %f %f %f %f\n", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/
    return is_transient;
 }
 
@@ -536,34 +576,22 @@ static const signed char tf_select_table[4][8] = {
       {0, -2, 0, -3,    3, 0, 1,-1},
 };
 
-static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, int width)
+static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias)
 {
-   int i, j;
-   static const opus_val16 sqrtM_1[4] = {Q15ONE, QCONST16(.70710678f,15), QCONST16(0.5f,15), QCONST16(0.35355339f,15)};
+   int i;
    opus_val32 L1;
-   opus_val16 bias;
-   L1=0;
-   for (i=0;i<1<<LM;i++)
-   {
-      opus_val32 L2 = 0;
-      for (j=0;j<N>>LM;j++)
-         L2 = MAC16_16(L2, tmp[(j<<LM)+i], tmp[(j<<LM)+i]);
-      L1 += celt_sqrt(L2);
-   }
-   L1 = MULT16_32_Q15(sqrtM_1[LM], L1);
-   if (width==1)
-      bias = QCONST16(.12f,15)*LM;
-   else if (width==2)
-      bias = QCONST16(.05f,15)*LM;
-   else
-      bias = QCONST16(.02f,15)*LM;
-   L1 = MAC16_32_Q15(L1, bias, L1);
+   L1 = 0;
+   for (i=0;i<N;i++)
+      L1 += EXTEND32(ABS16(tmp[i]));
+   /* When in doubt, prefer good freq resolution */
+   L1 = MAC16_32_Q15(L1, LM*bias, L1);
    return L1;
+
 }
 
 static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
       int *tf_res, int nbCompressedBytes, celt_norm *X, int N0, int LM,
-      int *tf_sum)
+      int *tf_sum, opus_val16 tf_estimate, int tf_chan)
 {
    int i;
    VARDECL(int, metric);
@@ -572,9 +600,16 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
    VARDECL(int, path0);
    VARDECL(int, path1);
    VARDECL(celt_norm, tmp);
+   VARDECL(celt_norm, tmp_1);
    int lambda;
+   int sel;
+   int selcost[2];
    int tf_select=0;
+   opus_val16 bias;
+
    SAVE_STACK;
+   bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(1.5f,14)-tf_estimate));
+   /*printf("%f ", bias);*/
 
    if (nbCompressedBytes<15*C)
    {
@@ -591,9 +626,10 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
       lambda = 4;
    else
       lambda = 3;
-
+   lambda*=2;
    ALLOC(metric, len, int);
    ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+   ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
    ALLOC(path0, len, int);
    ALLOC(path1, len, int);
 
@@ -601,19 +637,35 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
    for (i=0;i<len;i++)
    {
       int j, k, N;
+      int narrow;
       opus_val32 L1, best_L1;
       int best_level=0;
       N = (m->eBands[i+1]-m->eBands[i])<<LM;
+      /* band is too narrow to be split down to LM=-1 */
+      narrow = (m->eBands[i+1]-m->eBands[i])==1;
       for (j=0;j<N;j++)
-         tmp[j] = X[j+(m->eBands[i]<<LM)];
+         tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
       /* Just add the right channel if we're in stereo */
-      if (C==2)
+      /*if (C==2)
          for (j=0;j<N;j++)
-            tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));
-      L1 = l1_metric(tmp, N, isTransient ? LM : 0, N>>LM);
+            tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/
+      L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias);
       best_L1 = L1;
+      /* Check the -1 case for transients */
+      if (isTransient && !narrow)
+      {
+         for (j=0;j<N;j++)
+            tmp_1[j] = tmp[j];
+         haar1(tmp_1, N>>LM, 1<<LM);
+         L1 = l1_metric(tmp_1, N, LM+1, bias);
+         if (L1<best_L1)
+         {
+            best_L1 = L1;
+            best_level = -1;
+         }
+      }
       /*printf ("%f ", L1);*/
-      for (k=0;k<LM;k++)
+      for (k=0;k<LM+!(isTransient||narrow);k++)
       {
          int B;
 
@@ -622,12 +674,9 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
          else
             B = k+1;
 
-         if (isTransient)
-            haar1(tmp, N>>(LM-k), 1<<(LM-k));
-         else
-            haar1(tmp, N>>k, 1<<k);
+         haar1(tmp, N>>k, 1<<k);
 
-         L1 = l1_metric(tmp, N, B, N>>LM);
+         L1 = l1_metric(tmp, N, B, bias);
 
          if (L1 < best_L1)
          {
@@ -636,17 +685,40 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
          }
       }
       /*printf ("%d ", isTransient ? LM-best_level : best_level);*/
+      /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */
       if (isTransient)
-         metric[i] = best_level;
+         metric[i] = 2*best_level;
       else
-         metric[i] = -best_level;
-      *tf_sum += metric[i];
+         metric[i] = -2*best_level;
+      *tf_sum += (isTransient ? LM : 0) - metric[i]/2;
+      /* For bands that can't be split to -1, set the metric to the half-way point to avoid
+         biasing the decision */
+      if (narrow && (metric[i]==0 || metric[i]==-2*LM))
+         metric[i]-=1;
+      /*printf("%d ", metric[i]);*/
    }
    /*printf("\n");*/
-   /* NOTE: Future optimized implementations could detect extreme transients and set
-      tf_select = 1 but so far we have not found a reliable way of making this useful */
+   /* Search for the optimal tf resolution, including tf_select */
    tf_select = 0;
-
+   for (sel=0;sel<2;sel++)
+   {
+      cost0 = 0;
+      cost1 = isTransient ? 0 : lambda;
+      for (i=1;i<len;i++)
+      {
+         int curr0, curr1;
+         curr0 = IMIN(cost0, cost1 + lambda);
+         curr1 = IMIN(cost0 + lambda, cost1);
+         cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+         cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
+      }
+      cost0 = IMIN(cost0, cost1);
+      selcost[sel]=cost0;
+   }
+   /* For now, we're conservative and only allow tf_select=1 for transients.
+    * If tests confirm it's useful for non-transients, we could allow it. */
+   if (selcost[1]<selcost[0] && isTransient)
+      tf_select=1;
    cost0 = 0;
    cost1 = isTransient ? 0 : lambda;
    /* Viterbi forward pass */
@@ -676,8 +748,8 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
          curr1 = from1;
          path1[i]= 1;
       }
-      cost0 = curr0 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+0]);
-      cost1 = curr1 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+1]);
+      cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+      cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
    }
    tf_res[len-1] = cost0 < cost1 ? 0 : 1;
    /* Viterbi backward pass to check the decisions */
@@ -688,6 +760,7 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
       else
          tf_res[i] = path0[i+1];
    }
+   /*printf("%d %f\n", *tf_sum, tf_estimate);*/
    RESTORE_STACK;
 #ifdef FUZZING
    tf_select = rand()&0x1;
@@ -735,7 +808,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
       tf_select = 0;
    for (i=start;i<end;i++)
       tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
-   /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/
+   /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
 }
 
 static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
@@ -789,15 +862,20 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C)
 }
 
 static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
-      const opus_val16 *bandLogE, int end, int LM, int C, int N0)
+      const opus_val16 *bandLogE, int end, int LM, int C, int N0,
+      AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
+      int intensity)
 {
    int i;
    opus_val32 diff=0;
    int c;
    int trim_index = 5;
+   opus_val16 trim = QCONST16(5.f, 8);
+   opus_val16 logXC, logXC2;
    if (C==2)
    {
       opus_val16 sum = 0; /* Q10 */
+      opus_val16 minXC; /* Q10 */
       /* Compute inter-channel correlation for low frequencies */
       for (i=0;i<8;i++)
       {
@@ -808,6 +886,15 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
          sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
       }
       sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
+      minXC = sum;
+      for (i=8;i<intensity;i++)
+      {
+         int j;
+         opus_val32 partial = 0;
+         for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+            partial = MAC16_16(partial, X[j], X[N0+j]);
+         minXC = MIN16(minXC, EXTRACT16(SHR32(partial, 18)));
+      }
       /*printf ("%f\n", sum);*/
       if (sum > QCONST16(.995f,10))
          trim_index-=4;
@@ -817,18 +904,28 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
          trim_index-=2;
       else if (sum > QCONST16(.8f,10))
          trim_index-=1;
+      /* mid-side savings estimations based on the LF average*/
+      logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
+      /* mid-side savings estimations based on min correlation */
+      logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
+#ifdef FIXED_POINT
+      /* Compensate for Q20 vs Q14 input and convert output to Q8 */
+      logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+      logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+#endif
+
+      trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
+      *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2));
    }
 
    /* Estimate spectral tilt */
    c=0; do {
       for (i=0;i<end-1;i++)
       {
-         diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-m->nbEBands);
+         diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);
       }
    } while (++c<C);
-   /* We divide by two here to avoid making the tilt larger for stereo as a
-      result of a bug in the loop above */
-   diff /= 2*C*(end-1);
+   diff /= C*(end-1);
    /*printf("%f\n", diff);*/
    if (diff > QCONST16(2.f, DB_SHIFT))
       trim_index--;
@@ -838,11 +935,25 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
       trim_index++;
    if (diff < -QCONST16(10.f, DB_SHIFT))
       trim_index++;
+   trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+   trim -= 2*SHR16(tf_estimate-QCONST16(1.f,14), 14-8);
+#ifndef FIXED_POINT
+   if (analysis->valid)
+   {
+      trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05)));
+   }
+#endif
 
+#ifdef FIXED_POINT
+   trim_index = PSHR32(trim, 8);
+#else
+   trim_index = floor(.5+trim);
+#endif
    if (trim_index<0)
       trim_index = 0;
    if (trim_index>10)
       trim_index = 10;
+   /*printf("%d\n", trim_index);*/
 #ifdef FUZZING
    trim_index = rand()%11;
 #endif
@@ -891,6 +1002,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    VARDECL(celt_norm, X);
    VARDECL(celt_ener, bandE);
    VARDECL(opus_val16, bandLogE);
+   VARDECL(opus_val16, bandLogE2);
    VARDECL(int, fine_quant);
    VARDECL(opus_val16, error);
    VARDECL(int, pulses);
@@ -914,7 +1026,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    int alloc_trim;
    int pitch_index=COMBFILTER_MINPERIOD;
    opus_val16 gain1 = 0;
-   int intensity=0;
    int dual_stereo=0;
    int effectiveBytes;
    opus_val16 pf_threshold;
@@ -929,8 +1040,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    int anti_collapse_rsv;
    int anti_collapse_on=0;
    int silence=0;
+   int tf_chan = 0;
+   opus_val16 tf_estimate;
+   int pitch_change=0;
+   opus_int32 tot_boost=0;
+   opus_val16 sample_max;
+   opus_val16 maxDepth;
    ALLOC_STACK;
 
+   tf_estimate = QCONST16(1.0f,14);
    if (nbCompressedBytes<2 || pcm==NULL)
      return OPUS_BAD_ARG;
 
@@ -1045,6 +1163,9 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 
    ALLOC(in, CC*(N+st->overlap), celt_sig);
 
+   sample_max=MAX16(st->overlap_max, celt_maxabs16(pcm, C*(N-st->mode->overlap)));
+   st->overlap_max=celt_maxabs16(pcm+C*(N-st->mode->overlap), C*st->mode->overlap);
+   sample_max=MAX16(sample_max, st->overlap_max);
    /* Find pitch period and gain */
    {
       VARDECL(celt_sig, _pre);
@@ -1084,13 +1205,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
             *inp = tmp + st->preemph_memE[c];
             st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp)
                                    - MULT16_32_Q15(st->mode->preemph[0], tmp);
-            silence = silence && *inp == 0;
             inp++;
          }
          OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
          OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
       } while (++c<CC);
 
+#ifdef FIXED_POINT
+      silence = (sample_max==0);
+#else
+      silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth));
+#endif
 #ifdef FUZZING
       if ((rand()&0x3F)==0)
          silence = 1;
@@ -1120,8 +1245,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
          ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
 
          pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+         /* Don't search for the fir last 1.5 octave of the range because
+            there's too many false-positives due to short-term correlation */
          pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
-               COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
+               COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
          pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
 
          gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
@@ -1129,6 +1256,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
          if (pitch_index > COMBFILTER_MAXPERIOD-2)
             pitch_index = COMBFILTER_MAXPERIOD-2;
          gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
+         if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
+               && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
+            pitch_change = 1;
+         /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
          if (st->loss_rate>2)
             gain1 = HALF32(gain1);
          if (st->loss_rate>4)
@@ -1227,7 +1358,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
       if (st->complexity > 1)
       {
          isTransient = transient_analysis(in, N+st->overlap, CC,
-                  st->overlap);
+                  st->overlap, &tf_estimate, &tf_chan, &st->analysis);
          if (isTransient)
             shortBlocks = M;
       }
@@ -1244,6 +1375,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    {
       for (i=0;i<N;i++)
          freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i]));
+      tf_chan = 0;
    }
    if (st->upsample != 1)
    {
@@ -1256,17 +1388,53 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
             freq[c*N+i] = 0;
       } while (++c<C);
    }
-   ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
-
    compute_band_energies(st->mode, freq, bandE, effEnd, C, M);
 
    amp2Log2(st->mode, effEnd, st->end, bandE, bandLogE, C);
+   /*for (i=0;i<21;i++)
+      printf("%f ", bandLogE[i]);
+   printf("\n");*/
+
+   ALLOC(bandLogE2, C*st->mode->nbEBands, opus_val16);
+   if (shortBlocks && st->complexity>=8)
+   {
+      VARDECL(celt_sig, freq2);
+      VARDECL(opus_val32, bandE2);
+      ALLOC(freq2, CC*N, celt_sig);
+      compute_mdcts(st->mode, 0, in, freq2, CC, LM);
+      if (CC==2&&C==1)
+      {
+         for (i=0;i<N;i++)
+            freq2[i] = ADD32(HALF32(freq2[i]), HALF32(freq2[N+i]));
+      }
+      if (st->upsample != 1)
+      {
+         c=0; do
+         {
+            int bound = N/st->upsample;
+            for (i=0;i<bound;i++)
+               freq2[c*N+i] *= st->upsample;
+            for (;i<N;i++)
+               freq2[c*N+i] = 0;
+         } while (++c<C);
+      }
+      ALLOC(bandE2, C*st->mode->nbEBands, opus_val32);
+      compute_band_energies(st->mode, freq2, bandE2, effEnd, C, M);
+      amp2Log2(st->mode, effEnd, st->end, bandE2, bandLogE2, C);
+      for (i=0;i<C*st->mode->nbEBands;i++)
+         bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+   } else {
+      for (i=0;i<C*st->mode->nbEBands;i++)
+         bandLogE2[i] = bandLogE[i];
+   }
+
+   ALLOC(X, C*N, celt_norm);         /**< Interleaved normalised MDCTs */
 
    /* Band normalisation */
    normalise_bands(st->mode, freq, X, bandE, effEnd, C, M);
 
    ALLOC(tf_res, st->mode->nbEBands, int);
-   tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum);
+   tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum, tf_estimate, tf_chan);
    for (i=effEnd;i<st->end;i++)
       tf_res[i] = tf_res[effEnd-1];
 
@@ -1278,7 +1446,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
 
    tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
 
-   st->spread_decision = SPREAD_NORMAL;
    if (ec_tell(enc)+4<=total_bits)
    {
       if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
@@ -1286,9 +1453,21 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
          if (st->complexity == 0)
             st->spread_decision = SPREAD_NONE;
       } else {
-         st->spread_decision = spreading_decision(st->mode, X,
-               &st->tonal_average, st->spread_decision, &st->hf_average,
-               &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
+         if (st->analysis.valid)
+         {
+            static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
+            static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
+            static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
+            static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
+            st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
+            st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
+         } else {
+            st->spread_decision = spreading_decision(st->mode, X,
+                  &st->tonal_average, st->spread_decision, &st->hf_average,
+                  &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
+         }
+         /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
+         /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
       }
       ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
    }
@@ -1300,38 +1479,95 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    for (i=0;i<st->mode->nbEBands;i++)
       offsets[i] = 0;
    /* Dynamic allocation code */
+   maxDepth=-QCONST16(32.f, DB_SHIFT);
    /* Make sure that dynamic allocation can't make us bust the budget */
    if (effectiveBytes > 50 && LM>=1)
    {
-      int t1, t2;
-      if (LM <= 1)
+      int last=0;
+      VARDECL(opus_val16, follower);
+      ALLOC(follower, C*st->mode->nbEBands, opus_val16);
+      c=0;do
+      {
+         follower[c*st->mode->nbEBands] = bandLogE2[c*st->mode->nbEBands];
+         for (i=1;i<st->end;i++)
+         {
+            /* The last band to be at least 3 dB higher than the previous one
+               is the last we'll consider. Otherwise, we run into problems on
+               bandlimited signals. */
+            if (bandLogE2[c*st->mode->nbEBands+i] > bandLogE2[c*st->mode->nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
+               last=i;
+            follower[c*st->mode->nbEBands+i] = MIN16(follower[c*st->mode->nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*st->mode->nbEBands+i]);
+         }
+         for (i=last-1;i>=0;i--)
+            follower[c*st->mode->nbEBands+i] = MIN16(follower[c*st->mode->nbEBands+i], MIN16(follower[c*st->mode->nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*st->mode->nbEBands+i]));
+         for (i=0;i<st->end;i++)
+         {
+            opus_val16 noise_floor;
+            /* Noise floor must take into account eMeans, the depth, the width of the bands
+               and the preemphasis filter (approx. square of bark band ID) */
+            noise_floor = MULT16_16(QCONST16(0.0625f, DB_SHIFT),st->mode->logN[i])
+                  +QCONST16(.5f,DB_SHIFT)+SHL16(9-st->lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
+                  +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
+            follower[c*st->mode->nbEBands+i] = MAX16(follower[c*st->mode->nbEBands+i], noise_floor);
+            maxDepth = MAX16(maxDepth, bandLogE[c*st->mode->nbEBands+i]-noise_floor);
+         }
+      } while (++c<C);
+      if (C==2)
       {
-         t1 = 3;
-         t2 = 5;
+         for (i=st->start;i<st->end;i++)
+         {
+            /* Consider 24 dB "cross-talk" */
+            follower[st->mode->nbEBands+i] = MAX16(follower[st->mode->nbEBands+i], follower[                   i]-QCONST16(4.f,DB_SHIFT));
+            follower[                   i] = MAX16(follower[                   i], follower[st->mode->nbEBands+i]-QCONST16(4.f,DB_SHIFT));
+            follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[st->mode->nbEBands+i]-follower[st->mode->nbEBands+i]));
+         }
       } else {
-         t1 = 2;
-         t2 = 4;
+         for (i=st->start;i<st->end;i++)
+         {
+            follower[i] = MAX16(0, bandLogE[i]-follower[i]);
+         }
       }
-      for (i=st->start+1;i<st->end-1;i++)
+      /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
+      if ((!st->vbr || st->constrained_vbr)&&!isTransient)
       {
-         opus_val32 d2;
-         d2 = 2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1];
-         if (C==2)
-            d2 = HALF32(d2 + 2*bandLogE[i+st->mode->nbEBands]-
-                  bandLogE[i-1+st->mode->nbEBands]-bandLogE[i+1+st->mode->nbEBands]);
-#ifdef FUZZING
-         if((rand()&0xF)==0)
+         for (i=st->start;i<st->end;i++)
+            follower[i] = HALF16(follower[i]);
+      }
+      for (i=st->start;i<st->end;i++)
+      {
+         int width;
+         int boost;
+         int boost_bits;
+
+         if (i<8)
+            follower[i] *= 2;
+         if (i>=12)
+            follower[i] = HALF16(follower[i]);
+         follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
+
+         /* FIXME: Adaptively reduce follower at low rate or for cbr/cvbr */
+         width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
+         if (width<6)
          {
-            offsets[i] += 1;
-            if((rand()&0x3)==0)
-               offsets[i] += 1+(rand()&0x3);
+            boost = SHR32(EXTEND32(follower[i]),DB_SHIFT);
+            boost_bits = boost*width<<BITRES;
+         } else if (width > 48) {
+            boost = SHR32(EXTEND32(follower[i])*8,DB_SHIFT);
+            boost_bits = (boost*width<<BITRES)/8;
+         } else {
+            boost = SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
+            boost_bits = boost*6<<BITRES;
+         }
+         /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */
+         if ((!st->vbr || (st->constrained_vbr&&!isTransient))
+               && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4)
+         {
+            offsets[i] = 0;
+            break;
+         } else {
+            offsets[i] = boost;
+            tot_boost += boost_bits;
          }
-#else
-         if (d2 > SHL16(t1,DB_SHIFT))
-            offsets[i] += 1;
-         if (d2 > SHL16(t2,DB_SHIFT))
-            offsets[i] += 1;
-#endif
       }
    }
    dynalloc_logp = 6;
@@ -1368,11 +1604,36 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
          dynalloc_logp = IMAX(2, dynalloc_logp-1);
       offsets[i] = boost;
    }
+
+   if (C==2)
+   {
+      int effectiveRate;
+
+      static const opus_val16 intensity_thresholds[21]=
+      /* 0  1  2  3  4  5  6  7  8  9 10 11 12 13 14 15 16 17 18 19  20  off*/
+        { 16,21,23,25,27,29,31,33,35,38,42,46,50,54,58,63,68,75,84,102,130};
+      static const opus_val16 intensity_histeresis[21]=
+        {  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6,  8, 12};
+
+      /* Always use MS for 2.5 ms frames until we can do a better analysis */
+      if (LM!=0)
+         dual_stereo = stereo_analysis(st->mode, X, LM, N);
+
+      /* Account for coarse energy */
+      effectiveRate = (8*effectiveBytes - 80)>>LM;
+
+      /* effectiveRate in kb/s */
+      effectiveRate = 2*effectiveRate/5;
+
+      st->intensity = hysteresis_decision(effectiveRate, intensity_thresholds, intensity_histeresis, 21, st->intensity);
+      st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
+   }
+
    alloc_trim = 5;
    if (tell+(6<<BITRES) <= total_bits - total_boost)
    {
       alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
-            st->end, LM, C, N);
+            st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity);
       ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
       tell = ec_tell_frac(enc);
    }
@@ -1383,28 +1644,96 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
      opus_val16 alpha;
      opus_int32 delta;
      /* The target rate in 8th bits per frame */
-     opus_int32 target;
+     opus_int32 target, base_target;
      opus_int32 min_allowed;
+     int coded_bins;
+     int coded_bands;
      int lm_diff = st->mode->maxLM - LM;
+     coded_bands = st->lastCodedBands ? st->lastCodedBands : st->mode->nbEBands;
+     coded_bins = st->mode->eBands[coded_bands]<<LM;
+     if (C==2)
+        coded_bins += st->mode->eBands[IMIN(st->intensity, coded_bands)]<<LM;
 
      /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
         The CELT allocator will just not be able to use more than that anyway. */
      nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
-     target = vbr_rate + (st->vbr_offset>>lm_diff) - ((40*C+20)<<BITRES);
+     target = vbr_rate - ((40*C+20)<<BITRES);
+     base_target = target;
+
+     if (st->constrained_vbr)
+        target += (st->vbr_offset>>lm_diff);
+
+     /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
+#ifndef FIXED_POINT
+     if (st->analysis.valid && st->analysis.activity<.4)
+        target -= (coded_bins<<BITRES)*1*(.4-st->analysis.activity);
+#endif
+     /* Stereo savings */
+     if (C==2)
+     {
+        int coded_stereo_bands;
+        int coded_stereo_dof;
+        coded_stereo_bands = IMIN(st->intensity, coded_bands);
+        coded_stereo_dof = (st->mode->eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
+        /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
+        target -= MIN32(target/3, SHR16(MULT16_16(st->stereo_saving,(coded_stereo_dof<<BITRES)),8));
+        target += MULT16_16_Q15(QCONST16(0.035,15),coded_stereo_dof<<BITRES);
+     }
+     /* Limits starving of other bands when using dynalloc */
+     target += tot_boost;
+     /* Compensates for the average transient boost */
+     target = MULT16_32_Q15(QCONST16(0.96f,15),target);
+     /* Apply transient boost */
+     target = SHL32(MULT16_32_Q15(tf_estimate, target),1);
+
+#ifndef FIXED_POINT
+     /* Apply tonality boost */
+     if (st->analysis.valid) {
+        int tonal_target;
+        float tonal;
+
+        /* Compensates for the average tonality boost */
+        target -= MULT16_16_Q15(QCONST16(0.13f,15),coded_bins<<BITRES);
+
+        tonal = MAX16(0,st->analysis.tonality-.2);
+        tonal_target = target + (coded_bins<<BITRES)*2.0f*tonal;
+        if (pitch_change)
+           tonal_target +=  (coded_bins<<BITRES)*.8;
+        /*printf("%f %f ", st->analysis.tonality, tonal);*/
+        target = IMAX(tonal_target,target);
+     }
+#endif
 
-     /* Shortblocks get a large boost in bitrate, but since they
-        are uncommon long blocks are not greatly affected */
-     if (shortBlocks || tf_sum < -2*(st->end-st->start))
-        target = 7*target/4;
-     else if (tf_sum < -(st->end-st->start))
-        target = 3*target/2;
-     else if (M > 1)
-        target-=(target+14)/28;
+     {
+        opus_int32 floor_depth;
+        int bins;
+        bins = st->mode->eBands[st->mode->nbEBands-2]<<LM;
+        /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/
+        floor_depth = SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
+        floor_depth = IMAX(floor_depth, target>>2);
+        target = IMIN(target, floor_depth);
+        /*printf("%f %d\n", maxDepth, floor_depth);*/
+     }
+
+     if (st->constrained_vbr || st->bitrate<64000)
+     {
+        opus_val16 rate_factor;
+#ifdef FIXED_POINT
+        rate_factor = MAX16(0,(st->bitrate-32000));
+#else
+        rate_factor = MAX16(0,(1.f/32768)*(st->bitrate-32000));
+#endif
+        if (st->constrained_vbr)
+           rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
+        target = base_target + MULT16_32_Q15(rate_factor, target-base_target);
+
+     }
+     /* Don't allow more than doubling the rate */
+     target = IMIN(2*base_target, target);
 
      /* The current offset is removed from the target and the space used
         so far is added*/
      target=target+tell;
-
      /* In VBR mode the frame size must not be reduced so much that it would
          result in the encoder running out of bits.
         The margin of 2 bytes ensures that none of the bust-prevention logic
@@ -1444,8 +1773,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
      /*printf ("%d\n", st->vbr_reservoir);*/
 
      /* Compute the offset we need to apply in order to reach the target */
-     st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
-     st->vbr_offset = -st->vbr_drift;
+     if (st->constrained_vbr)
+     {
+        st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
+        st->vbr_offset = -st->vbr_drift;
+     }
      /*printf ("%d\n", st->vbr_drift);*/
 
      if (st->constrained_vbr && st->vbr_reservoir < 0)
@@ -1458,38 +1790,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
         /*printf ("+%d\n", adjust);*/
      }
      nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
+     /*printf("%d\n", nbCompressedBytes*50*8);*/
      /* This moves the raw bits to take into account the new compressed size */
      ec_enc_shrink(enc, nbCompressedBytes);
    }
-   if (C==2)
-   {
-      int effectiveRate;
-
-      /* Always use MS for 2.5 ms frames until we can do a better analysis */
-      if (LM!=0)
-         dual_stereo = stereo_analysis(st->mode, X, LM, N);
-
-      /* Account for coarse energy */
-      effectiveRate = (8*effectiveBytes - 80)>>LM;
-
-      /* effectiveRate in kb/s */
-      effectiveRate = 2*effectiveRate/5;
-      if (effectiveRate<35)
-         intensity = 8;
-      else if (effectiveRate<50)
-         intensity = 12;
-      else if (effectiveRate<68)
-         intensity = 16;
-      else if (effectiveRate<84)
-         intensity = 18;
-      else if (effectiveRate<102)
-         intensity = 19;
-      else if (effectiveRate<130)
-         intensity = 20;
-      else
-         intensity = 100;
-      intensity = IMIN(st->end,IMAX(st->start, intensity));
-   }
 
    /* Bit allocation */
    ALLOC(fine_quant, st->mode->nbEBands, int);
@@ -1501,7 +1805,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
    bits -= anti_collapse_rsv;
    codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap,
-         alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+         alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
          fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands);
    st->lastCodedBands = codedBands;
 
@@ -1521,7 +1825,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
    /* Residual quantisation */
    ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char);
    quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
-         bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res,
+         bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res,
          nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
 
    if (anti_collapse_rsv > 0)
@@ -1873,6 +2177,13 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
          st->signalling = value;
       }
       break;
+      case CELT_SET_ANALYSIS_REQUEST:
+      {
+         AnalysisInfo *info = va_arg(ap, AnalysisInfo *);
+         if (info)
+            OPUS_COPY(&st->analysis, info, 1);
+      }
+      break;
       case CELT_GET_MODE_REQUEST:
       {
          const CELTMode ** value = va_arg(ap, const CELTMode**);
@@ -2860,6 +3171,7 @@ const char *opus_strerror(int error)
 const char *opus_get_version_string(void)
 {
     return "libopus " OPUS_VERSION
+          "-exp_analysis"
 #ifdef FIXED_POINT
           "-fixed"
 #endif
diff --git a/celt/celt.h b/celt/celt.h
index 218cd883..4c04ddba 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -50,7 +50,20 @@ extern "C" {
 #define CELTDecoder OpusCustomDecoder
 #define CELTMode OpusCustomMode
 
-#define _celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+typedef struct {
+   int valid;
+   opus_val16 tonality;
+   opus_val16 tonality_slope;
+   opus_val16 noisiness;
+   opus_val16 activity;
+   int boost_band[2];
+   opus_val16 boost_amount[2];
+   opus_val16 music_prob;
+}AnalysisInfo;
+
+#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+
+#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
 
 /* Encoder/decoder Requests */
 
@@ -81,11 +94,18 @@ extern "C" {
 
 #define CELT_GET_MODE_REQUEST    10015
 /** Get the CELTMode used by an encoder or decoder */
-#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, _celt_check_mode_ptr_ptr(x)
+#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x)
 
 #define CELT_SET_SIGNALLING_REQUEST    10016
 #define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x)
 
+#define CELT_SET_TONALITY_REQUEST    10018
+#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_SLOPE_REQUEST    10020
+#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_ANALYSIS_REQUEST    10022
+#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
 
 
 /* Encoder stuff */
diff --git a/celt/mathops.h b/celt/mathops.h
index 4e977956..3c7486ad 100644
--- a/celt/mathops.h
+++ b/celt/mathops.h
@@ -43,6 +43,33 @@
 
 unsigned isqrt32(opus_uint32 _val);
 
+#ifndef OVERRIDE_CELT_MAXABS16
+static inline opus_val16 celt_maxabs16(const opus_val16 *x, int len)
+{
+   int i;
+   opus_val16 maxval = 0;
+   for (i=0;i<len;i++)
+      maxval = MAX16(maxval, ABS16(x[i]));
+   return maxval;
+}
+#endif
+
+#ifndef OVERRIDE_CELT_MAXABS32
+#ifdef FIXED_POINT
+static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+{
+   int i;
+   opus_val32 maxval = 0;
+   for (i=0;i<len;i++)
+      maxval = MAX32(maxval, ABS32(x[i]));
+   return maxval;
+}
+#else
+#define celt_maxabs32(x,len) celt_maxabs16(x,len)
+#endif
+#endif
+
+
 #ifndef FIXED_POINT
 
 #define PI 3.141592653f
@@ -117,27 +144,6 @@ static inline opus_int16 celt_ilog2(opus_int32 x)
 }
 #endif
 
-#ifndef OVERRIDE_CELT_MAXABS16
-static inline opus_val16 celt_maxabs16(opus_val16 *x, int len)
-{
-   int i;
-   opus_val16 maxval = 0;
-   for (i=0;i<len;i++)
-      maxval = MAX16(maxval, ABS16(x[i]));
-   return maxval;
-}
-#endif
-
-#ifndef OVERRIDE_CELT_MAXABS32
-static inline opus_val32 celt_maxabs32(opus_val32 *x, int len)
-{
-   int i;
-   opus_val32 maxval = 0;
-   for (i=0;i<len;i++)
-      maxval = MAX32(maxval, ABS32(x[i]));
-   return maxval;
-}
-#endif
 
 /** Integer log in base2. Defined for zero, but not for negative numbers */
 static inline opus_int16 celt_zlog2(opus_val32 x)
diff --git a/celt/mdct.c b/celt/mdct.c
index 16a36c69..b300b2b5 100644
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -109,12 +109,14 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
    int N, N2, N4;
    kiss_twiddle_scalar sine;
    VARDECL(kiss_fft_scalar, f);
+   VARDECL(kiss_fft_scalar, f2);
    SAVE_STACK;
    N = l->n;
    N >>= shift;
    N2 = N>>1;
    N4 = N>>2;
    ALLOC(f, N2, kiss_fft_scalar);
+   ALLOC(f2, N2, kiss_fft_scalar);
    /* sin(x) ~= x here */
 #ifdef FIXED_POINT
    sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -180,12 +182,12 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
    }
 
    /* N/4 complex FFT, down-scales by 4/N */
-   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in);
+   opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
 
    /* Post-rotate */
    {
       /* Temp pointers to make it really clear to the compiler what we're doing */
-      const kiss_fft_scalar * OPUS_RESTRICT fp = in;
+      const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
       kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
       kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
       const kiss_twiddle_scalar *t = &l->trig[0];
diff --git a/celt/pitch.c b/celt/pitch.c
index c2f08ec1..d9bba1b2 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -331,6 +331,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
       int T1, T1b;
       opus_val16 g1;
       opus_val16 cont=0;
+      opus_val16 thresh;
       T1 = (2*T0+k)/(2*k);
       if (T1 < minperiod)
          break;
@@ -372,7 +373,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
          cont = HALF32(prev_gain);
       else
          cont = 0;
-      if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
+      thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont);
+      /* Bias against very high pitch (very short period) to avoid false-positives
+         due to short-term correlation */
+      if (T1<3*minperiod)
+         thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont);
+      else if (T1<2*minperiod)
+         thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont);
+      if (g1 > thresh)
       {
          best_xy = xy;
          best_yy = yy;
diff --git a/celt/quant_bands.c b/celt/quant_bands.c
index b1d4eb15..241392f1 100644
--- a/celt/quant_bands.c
+++ b/celt/quant_bands.c
@@ -40,8 +40,8 @@
 #include "rate.h"
 
 #ifdef FIXED_POINT
-/* Mean energy in each band quantized in Q6 */
-static const signed char eMeans[25] = {
+/* Mean energy in each band quantized in Q4 */
+const signed char eMeans[25] = {
       103,100, 92, 85, 81,
        77, 72, 70, 78, 75,
        73, 71, 78, 74, 69,
@@ -49,8 +49,8 @@ static const signed char eMeans[25] = {
        60, 60, 60, 60, 60
 };
 #else
-/* Mean energy in each band quantized in Q6 and converted back to float */
-static const opus_val16 eMeans[25] = {
+/* Mean energy in each band quantized in Q4 and converted back to float */
+const opus_val16 eMeans[25] = {
       6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
       4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
       4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
diff --git a/celt/quant_bands.h b/celt/quant_bands.h
index bec2855c..b3187fad 100644
--- a/celt/quant_bands.h
+++ b/celt/quant_bands.h
@@ -35,6 +35,12 @@
 #include "entdec.h"
 #include "mathops.h"
 
+#ifdef FIXED_POINT
+extern const signed char eMeans[25];
+#else
+extern const opus_val16 eMeans[25];
+#endif
+
 void amp2Log2(const CELTMode *m, int effEnd, int end,
       celt_ener *bandE, opus_val16 *bandLogE, int C);
 
diff --git a/opus_headers.mk b/opus_headers.mk
index f160710c..43a978cd 100644
--- a/opus_headers.mk
+++ b/opus_headers.mk
@@ -1,4 +1,7 @@
 OPUS_HEAD = \
 include/opus.h \
 include/opus_multistream.h \
-src/opus_private.h
+src/opus_private.h \
+src/analysis.h \
+src/mlp.h \
+src/tansig_table.h
diff --git a/opus_sources.mk b/opus_sources.mk
index 384b036a..81eaef06 100644
--- a/opus_sources.mk
+++ b/opus_sources.mk
@@ -3,3 +3,8 @@ src/opus_decoder.c \
 src/opus_encoder.c \
 src/opus_multistream.c \
 src/repacketizer.c
+
+OPUS_SOURCES_FLOAT = \
+src/analysis.c \
+src/mlp.c \
+src/mlp_data.c
diff --git a/src/analysis.c b/src/analysis.c
new file mode 100644
index 00000000..6742a85c
--- /dev/null
+++ b/src/analysis.c
@@ -0,0 +1,461 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "kiss_fft.h"
+#include "celt.h"
+#include "modes.h"
+#include "arch.h"
+#include "quant_bands.h"
+#include <stdio.h>
+#include "analysis.h"
+#include "mlp.h"
+
+extern const MLP net;
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+static const float dct_table[128] = {
+        0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
+        0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
+        0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
+        -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
+        0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
+        -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
+        0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
+        0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
+        0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
+        0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
+        0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
+        -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
+        0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
+        -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
+        0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
+        0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
+};
+
+static const float analysis_window[240] = {
+      0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
+      0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
+      0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
+      0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
+      0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
+      0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
+      0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
+      0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
+      0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
+      0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
+      0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
+      0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
+      0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
+      0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
+      0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
+      0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
+      0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
+      0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
+      0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
+      0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
+      0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
+      0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
+      0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
+      0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
+      0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
+      0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
+      0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
+      0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
+      0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
+      0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
+};
+
+static const int tbands[NB_TBANDS+1] = {
+       2,  4,  6,  8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
+};
+
+/*static const float tweight[NB_TBANDS+1] = {
+      .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
+};*/
+
+#define NB_TONAL_SKIP_BANDS 9
+
+#define cA 0.43157974f
+#define cB 0.67848403f
+#define cC 0.08595542f
+#define cE (M_PI/2)
+static inline float fast_atan2f(float y, float x) {
+   float x2, y2;
+   /* Should avoid underflow on the values we'll get */
+   if (ABS16(x)+ABS16(y)<1e-9)
+   {
+      x*=1e12;
+      y*=1e12;
+   }
+   x2 = x*x;
+   y2 = y*y;
+   if(x2<y2){
+      float den = (y2 + cB*x2) * (y2 + cC*x2);
+      if (den!=0)
+         return -x*y*(y2 + cA*x2) / den + copysignf(cE,y);
+      else
+         return copysignf(cE,y);
+   }else{
+      float den = (x2 + cB*y2) * (x2 + cC*y2);
+      if (den!=0)
+         return  x*y*(x2 + cA*y2) / den + copysignf(cE,y) - copysignf(cE,x*y);
+      else
+         return copysignf(cE,y) - copysignf(cE,x*y);
+   }
+}
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C)
+{
+    int i, b;
+    const CELTMode *mode;
+    const kiss_fft_state *kfft;
+    kiss_fft_cpx in[480], out[480];
+    int N = 480, N2=240;
+    float * restrict A = tonal->angle;
+    float * restrict dA = tonal->d_angle;
+    float * restrict d2A = tonal->d2_angle;
+    float tonality[240];
+    float noisiness[240];
+    float band_tonality[NB_TBANDS];
+    float logE[NB_TBANDS];
+    float BFCC[8];
+    float features[100];
+    float frame_tonality;
+    float max_frame_tonality;
+    float tw_sum=0;
+    float frame_noisiness;
+    const float pi4 = M_PI*M_PI*M_PI*M_PI;
+    float slope=0;
+    float frame_stationarity;
+    float relativeE;
+    float frame_prob;
+    float alpha, alphaE, alphaE2;
+    float frame_loudness;
+    float bandwidth_mask;
+    int bandwidth=0;
+    float bandE[NB_TBANDS];
+    celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
+
+    tonal->last_transition++;
+    alpha = 1.f/IMIN(20, 1+tonal->count);
+    alphaE = 1.f/IMIN(50, 1+tonal->count);
+    alphaE2 = 1.f/IMIN(6000, 1+tonal->count);
+
+    if (tonal->count<4)
+       tonal->music_prob = .5;
+    kfft = mode->mdct.kfft[0];
+    if (C==1)
+    {
+       for (i=0;i<N2;i++)
+       {
+          float w = analysis_window[i];
+          in[i].r = MULT16_16(w, x[i]);
+          in[i].i = MULT16_16(w, x[N-N2+i]);
+          in[N-i-1].r = MULT16_16(w, x[N-i-1]);
+          in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]);
+       }
+    } else {
+       for (i=0;i<N2;i++)
+       {
+          float w = analysis_window[i];
+          in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]);
+          in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]);
+          in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);
+          in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]);
+       }
+    }
+    opus_fft(kfft, in, out);
+
+    for (i=1;i<N2;i++)
+    {
+       float X1r, X2r, X1i, X2i;
+       float angle, d_angle, d2_angle;
+       float angle2, d_angle2, d2_angle2;
+       float mod1, mod2, avg_mod;
+       X1r = out[i].r+out[N-i].r;
+       X1i = out[i].i-out[N-i].i;
+       X2r = out[i].i+out[N-i].i;
+       X2i = out[N-i].r-out[i].r;
+
+       angle = (.5/M_PI)*fast_atan2f(X1i, X1r);
+       d_angle = angle - A[i];
+       d2_angle = d_angle - dA[i];
+
+       angle2 = (.5/M_PI)*fast_atan2f(X2i, X2r);
+       d_angle2 = angle2 - angle;
+       d2_angle2 = d_angle2 - d_angle;
+
+       mod1 = d2_angle - floor(.5+d2_angle);
+       noisiness[i] = fabs(mod1);
+       mod1 *= mod1;
+       mod1 *= mod1;
+
+       mod2 = d2_angle2 - floor(.5+d2_angle2);
+       noisiness[i] += fabs(mod2);
+       mod2 *= mod2;
+       mod2 *= mod2;
+
+       avg_mod = .25*(d2A[i]+2*mod1+mod2);
+       tonality[i] = 1./(1+40*16*pi4*avg_mod)-.015;
+
+       A[i] = angle2;
+       dA[i] = d_angle2;
+       d2A[i] = mod2;
+    }
+
+    frame_tonality = 0;
+    max_frame_tonality = 0;
+    tw_sum = 0;
+    info->activity = 0;
+    frame_noisiness = 0;
+    frame_stationarity = 0;
+    if (!tonal->count)
+    {
+       for (b=0;b<NB_TBANDS;b++)
+       {
+          tonal->lowE[b] = 1e10;
+          tonal->highE[b] = -1e10;
+       }
+    }
+    relativeE = 0;
+    info->boost_amount[0]=info->boost_amount[1]=0;
+    info->boost_band[0]=info->boost_band[1]=0;
+    frame_loudness = 0;
+    bandwidth_mask = 0;
+    for (b=0;b<NB_TBANDS;b++)
+    {
+       float E=0, tE=0, nE=0;
+       float L1, L2;
+       float stationarity;
+       for (i=tbands[b];i<tbands[b+1];i++)
+       {
+          float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
+                     + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+          E += binE;
+          tE += binE*tonality[i];
+          nE += binE*2*(.5-noisiness[i]);
+       }
+       bandE[b] = E;
+       tonal->E[tonal->E_count][b] = E;
+       frame_noisiness += nE/(1e-15+E);
+
+       frame_loudness += sqrt(E+1e-10);
+       /* Add a reasonable noise floor */
+       tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
+       tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E);
+       /* 13 dB slope for spreading function */
+       bandwidth_mask = MAX32(.05*bandwidth_mask, E);
+       /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
+       if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask)
+          bandwidth = b;
+       logE[b] = log(E+1e-10);
+       tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01);
+       tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1);
+       if (tonal->highE[b] < tonal->lowE[b]+1)
+       {
+          tonal->highE[b]+=.5;
+          tonal->lowE[b]-=.5;
+       }
+       relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
+
+       L1=L2=0;
+       for (i=0;i<NB_FRAMES;i++)
+       {
+          L1 += sqrt(tonal->E[i][b]);
+          L2 += tonal->E[i][b];
+       }
+
+       stationarity = MIN16(0.99,L1/sqrt(EPSILON+NB_FRAMES*L2));
+       stationarity *= stationarity;
+       stationarity *= stationarity;
+       frame_stationarity += stationarity;
+       /*band_tonality[b] = tE/(1e-15+E)*/;
+       band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
+#if 0
+       if (b>=NB_TONAL_SKIP_BANDS)
+       {
+          frame_tonality += tweight[b]*band_tonality[b];
+          tw_sum += tweight[b];
+       }
+#else
+       frame_tonality += band_tonality[b];
+       if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
+          frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
+#endif
+       max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality);
+       slope += band_tonality[b]*(b-8);
+       /*printf("%f %f ", band_tonality[b], stationarity);*/
+       if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
+       {
+          if (band_tonality[b] > info->boost_amount[0])
+          {
+             info->boost_amount[1] = info->boost_amount[0];
+             info->boost_band[1] = info->boost_band[0];
+             info->boost_amount[0] = band_tonality[b];
+             info->boost_band[0] = b;
+          } else {
+             info->boost_amount[1] = band_tonality[b];
+             info->boost_band[1] = b;
+          }
+       }
+       tonal->prev_band_tonality[b] = band_tonality[b];
+    }
+
+    frame_loudness = 20*log10(frame_loudness);
+    tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
+    tonal->lowECount *= (1-alphaE);
+    if (frame_loudness < tonal->Etracker-30)
+       tonal->lowECount += alphaE;
+
+    for (i=0;i<8;i++)
+    {
+       float sum=0;
+       for (b=0;b<16;b++)
+          sum += dct_table[i*16+b]*logE[b];
+       BFCC[i] = sum;
+    }
+
+    frame_stationarity /= NB_TBANDS;
+    relativeE /= NB_TBANDS;
+    if (tonal->count<10)
+       relativeE = .5;
+    frame_noisiness /= NB_TBANDS;
+#if 1
+    info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
+#else
+    info->activity = .5*(1+frame_noisiness-frame_stationarity);
+#endif
+    frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
+    frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
+    tonal->prev_tonality = frame_tonality;
+    info->boost_amount[0] -= frame_tonality+.2;
+    info->boost_amount[1] -= frame_tonality+.2;
+    if (band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]+1]+.15
+        || band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]-1]+.15)
+       info->boost_amount[0]=0;
+    if (band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]+1]+.15
+        || band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]-1]+.15)
+       info->boost_amount[1]=0;
+
+    slope /= 8*8;
+    info->tonality_slope = slope;
+
+    tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
+    tonal->count++;
+    info->tonality = frame_tonality;
+
+    for (i=0;i<4;i++)
+       features[i] = -0.12299*(BFCC[i]+tonal->mem[i+24]) + 0.49195*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693*tonal->mem[i+8] - 1.4349*tonal->cmean[i];
+
+    for (i=0;i<4;i++)
+       tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
+
+    for (i=0;i<4;i++)
+        features[4+i] = 0.63246*(BFCC[i]-tonal->mem[i+24]) + 0.31623*(tonal->mem[i]-tonal->mem[i+16]);
+    for (i=0;i<3;i++)
+        features[8+i] = 0.53452*(BFCC[i]+tonal->mem[i+24]) - 0.26726*(tonal->mem[i]+tonal->mem[i+16]) -0.53452*tonal->mem[i+8];
+
+    if (tonal->count > 5)
+    {
+       for (i=0;i<9;i++)
+          tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
+    }
+
+    for (i=0;i<8;i++)
+    {
+       tonal->mem[i+24] = tonal->mem[i+16];
+       tonal->mem[i+16] = tonal->mem[i+8];
+       tonal->mem[i+8] = tonal->mem[i];
+       tonal->mem[i] = BFCC[i];
+    }
+    for (i=0;i<9;i++)
+       features[11+i] = sqrt(tonal->std[i]);
+    features[20] = info->tonality;
+    features[21] = info->activity;
+    features[22] = frame_stationarity;
+    features[23] = info->tonality_slope;
+    features[24] = tonal->lowECount;
+
+#ifndef FIXED_POINT
+    mlp_process(&net, features, &frame_prob);
+    /* Adds a "probability dead zone", with a cap on certainty */
+    frame_prob = .90*frame_prob*frame_prob*frame_prob;
+
+    frame_prob = .5*(frame_prob+1);
+
+    /*printf("%f\n", frame_prob);*/
+    {
+       float tau, beta;
+       float p0, p1;
+       float max_certainty;
+       /* One transition every 3 minutes */
+       tau = .00005;
+       beta = .1;
+       max_certainty = 1.f/(10+1*tonal->last_transition);
+       p0 = (1-tonal->music_prob)*(1-tau) +    tonal->music_prob *tau;
+       p1 =    tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
+       p0 *= pow(1-frame_prob, beta);
+       p1 *= pow(frame_prob, beta);
+       tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
+       info->music_prob = tonal->music_prob;
+       /*printf("%f %f\n", frame_prob, info->music_prob);*/
+    }
+    if (tonal->last_music != (tonal->music_prob>.5))
+       tonal->last_transition=0;
+    tonal->last_music = tonal->music_prob>.5;
+#else
+    info->music_prob = 0;
+#endif
+    /*for (i=0;i<25;i++)
+       printf("%f ", features[i]);
+    printf("\n");*/
+
+    /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */
+    if (bandwidth == NB_TBANDS-1 || tonal->count<100)
+    {
+       tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+    } else {
+       int close_enough = 0;
+       if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1)
+          close_enough=1;
+       if (bandwidth<=11 || (bandwidth==12 && close_enough))
+          tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+       else if (bandwidth<=13)
+          tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+       else if (bandwidth<=15 || (bandwidth==16 && close_enough))
+          tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+    }
+    info->noisiness = frame_noisiness;
+    info->valid = 1;
+}
diff --git a/src/analysis.h b/src/analysis.h
new file mode 100644
index 00000000..09d1036a
--- /dev/null
+++ b/src/analysis.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ANALYSIS_H
+#define ANALYSIS_H
+
+#define NB_FRAMES 8
+#define NB_TBANDS 18
+
+
+typedef struct {
+   float angle[240];
+   float d_angle[240];
+   float d2_angle[240];
+   float prev_band_tonality[NB_TBANDS];
+   float prev_tonality;
+   float E[NB_FRAMES][NB_TBANDS];
+   float lowE[NB_TBANDS], highE[NB_TBANDS];
+   float meanE[NB_TBANDS], meanRE[NB_TBANDS];
+   float mem[32];
+   float cmean[8];
+   float std[9];
+   float music_prob;
+   float Etracker;
+   float lowECount;
+   int E_count;
+   int last_music;
+   int last_transition;
+   int count;
+   int opus_bandwidth;
+} TonalityAnalysisState;
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
+     CELTEncoder *celt_enc, const opus_val16 *x, int C);
+
+#endif
diff --git a/src/mlp.c b/src/mlp.c
new file mode 100644
index 00000000..dd3690db
--- /dev/null
+++ b/src/mlp.c
@@ -0,0 +1,109 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include <math.h>
+#include "mlp.h"
+#include "arch.h"
+#include "tansig_table.h"
+#define MAX_NEURONS 100
+
+#ifdef FIXED_POINT
+extern const opus_val16 tansig_table[501];
+static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
+{
+	int i;
+	opus_val16 xx; /* Q11 */
+	/*double x, y;*/
+	opus_val16 dy, yy; /* Q14 */
+	/*x = 1.9073e-06*_x;*/
+	if (_x>=QCONST32(10,19))
+		return QCONST32(1.,14);
+	if (_x<=-QCONST32(10,19))
+		return -QCONST32(1.,14);
+	xx = EXTRACT16(SHR32(_x, 8));
+	/*i = lrint(25*x);*/
+	i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
+	/*x -= .04*i;*/
+	xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
+	/*x = xx*(1./2048);*/
+	/*y = tansig_table[250+i];*/
+	yy = tansig_table[250+i];
+	/*y = yy*(1./16384);*/
+	dy = 16384-MULT16_16_Q14(yy,yy);
+	yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
+	return yy;
+}
+#else
+/*extern const float tansig_table[501];*/
+static inline double tansig_approx(double x)
+{
+	int i;
+	double y, dy;
+	double sign=1;
+    if (x>=8)
+        return 1;
+    if (x<=-8)
+        return -1;
+	if (x<0)
+	{
+	   x=-x;
+	   sign=-1;
+	}
+	i = lrint(25*x);
+	x -= .04*i;
+	y = tansig_table[i];
+	dy = 1-y*y;
+	y = y + x*dy*(1 - y*x);
+	return sign*y;
+}
+#endif
+
+void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
+{
+	int j;
+	opus_val16 hidden[MAX_NEURONS];
+	const opus_val16 *W = m->weights;
+	/* Copy to tmp_in */
+	for (j=0;j<m->topo[1];j++)
+	{
+		int k;
+		opus_val32 sum = SHL32(EXTEND32(*W++),8);
+		for (k=0;k<m->topo[0];k++)
+			sum = MAC16_16(sum, in[k],*W++);
+		hidden[j] = tansig_approx(sum);
+	}
+	for (j=0;j<m->topo[2];j++)
+	{
+		int k;
+		opus_val32 sum = SHL32(EXTEND32(*W++),14);
+		for (k=0;k<m->topo[1];k++)
+			sum = MAC16_16(sum, hidden[k], *W++);
+		out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
+	}
+}
+
diff --git a/src/mlp.h b/src/mlp.h
new file mode 100644
index 00000000..68ff68d8
--- /dev/null
+++ b/src/mlp.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _MLP_H_
+#define _MLP_H_
+
+#include "arch.h"
+
+typedef struct {
+	int layers;
+	const int *topo;
+	const opus_val16 *weights;
+} MLP;
+
+void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out);
+
+#endif /* _MLP_H_ */
diff --git a/src/mlp_data.c b/src/mlp_data.c
new file mode 100644
index 00000000..dbc7cec3
--- /dev/null
+++ b/src/mlp_data.c
@@ -0,0 +1,73 @@
+#include "mlp.h"
+
+/* RMS error was 0.179835, seed was 1322103961 */
+
+static const float weights[271] = {
+
+/* hidden layer */
+1.55597, -0.0739792, -0.0646761, -0.099531, -0.0794943, 
+0.0180174, -0.0391354, 0.0508224, -0.0160169, -0.0773263, 
+-0.0300002, -0.0865361, 0.124477, -0.28648, -0.0860702, 
+-0.518949, -0.0873341, -0.235393, -0.907833, -0.383573, 
+0.535388, -0.57944, 0.98116, 0.8482, 1.12426, 
+-3.23721, -0.647072, -0.0265139, 0.0711052, -0.00125666, 
+-0.0396181, -0.44282, -0.510495, -0.201865, 0.0134336, 
+-0.167205, -0.155406, 0.00041678, -0.00468705, -0.0233224, 
+0.264279, -0.301375, 0.00234895, 0.0144741, -0.137535, 
+0.200323, 0.0192027, 3.19818, 2.03495, 0.705517, 
+-4.6025, -0.11485, -0.792716, 0.150714, 0.10608, 
+0.240633, 0.0690698, 0.0695297, 0.124819, 0.0501433, 
+0.0460952, 0.147639, 0.10327, 0.158007, 0.113714, 
+0.0276191, 0.0680749, -0.130012, 0.0796126, 0.133067, 
+0.51495, 0.747578, -0.128742, 5.98112, -1.16698, 
+-0.276492, -1.73549, -3.90234, 2.01489, -0.040118, 
+-0.113002, -0.146751, -0.113569, 0.0534873, 0.0989832, 
+0.0872875, 0.049266, 0.0367557, -0.00889148, -0.0648461, 
+-0.00190352, 0.0143773, 0.0259364, -0.0592133, -0.0672924, 
+0.1399, -0.0987886, -0.347402, 0.101326, -0.0680876, 
+0.469186, 0.246922, 10.4017, 3.44846, -0.662725, 
+-0.0328208, -0.0561274, -0.0167744, 0.00044282, -0.0457645, 
+-0.0408314, -0.013113, -0.0373873, -0.0474122, -0.0273745, 
+-0.0308505, 0.000582959, -0.0421135, 0.464859, 0.196842, 
+0.320538, 0.0435528, -0.200168, 0.266475, -0.0853727, 
+1.20397, 0.711542, -1.04397, -1.47759, 1.26768, 
+0.446958, 0.266477, -0.30802, 0.28431, -0.118541, 
+0.00836345, 0.0689026, -0.0137996, -0.0395417, 0.26982, 
+-0.206255, 0.16066, 0.114757, 0.359587, -0.106503, 
+-0.0948534, 0.175358, -0.122966, -0.0056675, 0.483848, 
+-0.134916, -0.427567, -0.140172, -1.0866, -2.73921, 
+0.549843, 0.17685, 0.0010675, -0.00137386, 0.0884424, 
+-0.0698736, -0.00174136, 0.0718775, -0.0396849, 0.0448056, 
+0.0577853, -0.0372353, 0.134599, 0.0260656, 0.140322, 
+0.22704, -0.020568, -0.0142424, -0.21723, -0.997704, 
+-0.884573, -0.163495, 2.33617, 0.224142, 0.19635, 
+-0.957387, 0.144678, 1.47035, -0.00700498, -0.0472309, 
+-0.0137848, -0.0189145, 0.00856479, 0.0316965, 0.00613373, 
+0.00209807, 0.00270964, -0.0490206, 0.0105712, -0.0465045, 
+-0.0381532, -0.0985268, -0.108297, 0.0146409, -0.0040718, 
+-0.0698572, -0.380568, -0.230479, 3.98917, 0.457652, 
+-1.02355, -7.4435, -0.475314, 1.61743, 0.0254017, 
+-0.00791293, 0.047217, 0.0220995, -0.0304311, 0.0052168, 
+-0.0404054, -0.0230293, 0.00169229, -0.0138178, 0.0043137, 
+-0.0598088, -0.133601, 0.0555138, -0.177358, -0.159856, 
+-0.137281, 0.108051, -0.305973, 0.393775, 0.0747287, 
+0.783993, -0.875086, 1.06862, 0.340519, -0.352681, 
+-0.0830912, -0.100017, 0.0729085, -0.00829403, 0.027489, 
+-0.0779597, 0.082286, -0.164181, -0.41519, 0.00282335, 
+-0.29573, 0.125571, 0.726935, 0.392137, 0.491348, 
+0.0723196, -0.0259758, -0.0636332, -0.452384, -0.000225974, 
+-2.34001, 2.45211, -0.544628, 5.62944, -3.44507, 
+
+/* output layer */
+-3.13835, 0.994751, 0.444901, 1.59518, 1.23665, 
+3.37012, -1.34606, 1.99131, 1.33476, 1.3885, 
+1.12559, };
+
+static const int topo[3] = {25, 10, 1};
+
+const MLP net = {
+	3,
+	topo,
+	weights
+};
+
diff --git a/src/mlp_train.c b/src/mlp_train.c
new file mode 100644
index 00000000..6421c17d
--- /dev/null
+++ b/src/mlp_train.c
@@ -0,0 +1,496 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "mlp_train.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <time.h>
+#include <signal.h>
+
+int stopped = 0;
+
+void handler(int sig)
+{
+	stopped = 1;
+	signal(sig, handler);
+}
+
+MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int nbSamples)
+{
+	int i, j, k;
+	MLPTrain *net;
+	int inDim, outDim;
+	net = malloc(sizeof(*net));
+	net->topo = malloc(nbLayers*sizeof(net->topo[0]));
+	for (i=0;i<nbLayers;i++)
+		net->topo[i] = topo[i];
+	inDim = topo[0];
+	outDim = topo[nbLayers-1];
+	net->in_rate = malloc((inDim+1)*sizeof(net->in_rate[0]));
+	net->weights = malloc((nbLayers-1)*sizeof(net->weights));
+	net->best_weights = malloc((nbLayers-1)*sizeof(net->weights));
+	for (i=0;i<nbLayers-1;i++)
+	{
+		net->weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0]));
+		net->best_weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0]));
+	}
+	double inMean[inDim];
+	for (j=0;j<inDim;j++)
+	{
+		double std=0;
+		inMean[j] = 0;
+		for (i=0;i<nbSamples;i++)
+		{
+			inMean[j] += inputs[i*inDim+j];
+			std += inputs[i*inDim+j]*inputs[i*inDim+j];
+		}
+		inMean[j] /= nbSamples;
+		std /= nbSamples;
+		net->in_rate[1+j] = .5/(.0001+std);
+		std = std-inMean[j]*inMean[j];
+		if (std<.001)
+			std = .001;
+		std = 1/sqrt(inDim*std);
+		for (k=0;k<topo[1];k++)
+			net->weights[0][k*(topo[0]+1)+j+1] = randn(std);
+	}
+	net->in_rate[0] = 1;
+	for (j=0;j<topo[1];j++)
+	{
+		double sum = 0;
+		for (k=0;k<inDim;k++)
+			sum += inMean[k]*net->weights[0][j*(topo[0]+1)+k+1];
+		net->weights[0][j*(topo[0]+1)] = -sum;
+	}
+	for (j=0;j<outDim;j++)
+	{
+		double mean = 0;
+		double std;
+		for (i=0;i<nbSamples;i++)
+			mean += outputs[i*outDim+j];
+		mean /= nbSamples;
+		std = 1/sqrt(topo[nbLayers-2]);
+		net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)] = mean;
+		for (k=0;k<topo[nbLayers-2];k++)
+			net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)+k+1] = randn(std);
+	}
+	return net;
+}
+
+#define MAX_NEURONS 100
+
+double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamples, double *W0_grad, double *W1_grad, double *error_rate)
+{
+	int i,j;
+	int s;
+	int inDim, outDim, hiddenDim;
+	int *topo;
+	double *W0, *W1;
+	double rms=0;
+	int W0_size, W1_size;
+	double hidden[MAX_NEURONS];
+	double netOut[MAX_NEURONS];
+	double error[MAX_NEURONS];
+
+        *error_rate = 0;
+	topo = net->topo;
+	inDim = net->topo[0];
+	hiddenDim = net->topo[1];
+	outDim = net->topo[2];
+	W0_size = (topo[0]+1)*topo[1];
+	W1_size = (topo[1]+1)*topo[2];
+	W0 = net->weights[0];
+	W1 = net->weights[1];
+	memset(W0_grad, 0, W0_size*sizeof(double));
+	memset(W1_grad, 0, W1_size*sizeof(double));
+	for (i=0;i<outDim;i++)
+		netOut[i] = outputs[i];
+	for (s=0;s<nbSamples;s++)
+	{
+		float *in, *out;
+		in = inputs+s*inDim;
+		out = outputs + s*outDim;
+		for (i=0;i<hiddenDim;i++)
+		{
+			double sum = W0[i*(inDim+1)];
+			for (j=0;j<inDim;j++)
+				sum += W0[i*(inDim+1)+j+1]*in[j];
+			hidden[i] = tansig_approx(sum);
+		}
+		for (i=0;i<outDim;i++)
+		{
+			double sum = W1[i*(hiddenDim+1)];
+			for (j=0;j<hiddenDim;j++)
+				sum += W1[i*(hiddenDim+1)+j+1]*hidden[j];
+			netOut[i] = tansig_approx(sum);
+			error[i] = out[i] - netOut[i];
+			rms += error[i]*error[i];
+			*error_rate += fabs(error[i])>1;
+			/*error[i] = error[i]/(1+fabs(error[i]));*/
+		}
+		/* Back-propagate error */
+		for (i=0;i<outDim;i++)
+		{
+                        float grad = 1-netOut[i]*netOut[i];
+			W1_grad[i*(hiddenDim+1)] += error[i]*grad;
+			for (j=0;j<hiddenDim;j++)
+				W1_grad[i*(hiddenDim+1)+j+1] += grad*error[i]*hidden[j];
+		}
+		for (i=0;i<hiddenDim;i++)
+		{
+			double grad;
+			grad = 0;
+			for (j=0;j<outDim;j++)
+				grad += error[j]*W1[j*(hiddenDim+1)+i+1];
+			grad *= 1-hidden[i]*hidden[i];
+			W0_grad[i*(inDim+1)] += grad;
+			for (j=0;j<inDim;j++)
+				W0_grad[i*(inDim+1)+j+1] += grad*in[j];
+		}
+	}
+	return rms;
+}
+
+#define NB_THREADS 8
+
+sem_t sem_begin[NB_THREADS];
+sem_t sem_end[NB_THREADS];
+
+struct GradientArg {
+	int id;
+	int done;
+	MLPTrain *net;
+	float *inputs;
+	float *outputs;
+	int nbSamples;
+	double *W0_grad;
+	double *W1_grad;
+	double rms;
+	double error_rate;
+};
+
+void *gradient_thread_process(void *_arg)
+{
+	int W0_size, W1_size;
+	struct GradientArg *arg = _arg;
+	int *topo = arg->net->topo;
+	W0_size = (topo[0]+1)*topo[1];
+	W1_size = (topo[1]+1)*topo[2];
+	double W0_grad[W0_size];
+	double W1_grad[W1_size];
+	arg->W0_grad = W0_grad;
+	arg->W1_grad = W1_grad;
+	while (1)
+	{
+		sem_wait(&sem_begin[arg->id]);
+		if (arg->done)
+			break;
+		arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, &arg->error_rate);
+		sem_post(&sem_end[arg->id]);
+	}
+	fprintf(stderr, "done\n");
+	return NULL;
+}
+
+float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSamples, int nbEpoch, float rate)
+{
+	int i, j;
+	int e;
+	float best_rms = 1e10;
+	int inDim, outDim, hiddenDim;
+	int *topo;
+	double *W0, *W1, *best_W0, *best_W1;
+	double *W0_old, *W1_old;
+	double *W0_old2, *W1_old2;
+	double *W0_grad, *W1_grad;
+	double *W0_oldgrad, *W1_oldgrad;
+	double *W0_rate, *W1_rate;
+	double *best_W0_rate, *best_W1_rate;
+	int W0_size, W1_size;
+	topo = net->topo;
+	W0_size = (topo[0]+1)*topo[1];
+	W1_size = (topo[1]+1)*topo[2];
+	struct GradientArg args[NB_THREADS];
+	pthread_t thread[NB_THREADS];
+	int samplePerPart = nbSamples/NB_THREADS;
+	int count_worse=0;
+	int count_retries=0;
+
+	topo = net->topo;
+	inDim = net->topo[0];
+	hiddenDim = net->topo[1];
+	outDim = net->topo[2];
+	W0 = net->weights[0];
+	W1 = net->weights[1];
+	best_W0 = net->best_weights[0];
+	best_W1 = net->best_weights[1];
+	W0_old = malloc(W0_size*sizeof(double));
+	W1_old = malloc(W1_size*sizeof(double));
+	W0_old2 = malloc(W0_size*sizeof(double));
+	W1_old2 = malloc(W1_size*sizeof(double));
+	W0_grad = malloc(W0_size*sizeof(double));
+	W1_grad = malloc(W1_size*sizeof(double));
+	W0_oldgrad = malloc(W0_size*sizeof(double));
+	W1_oldgrad = malloc(W1_size*sizeof(double));
+	W0_rate = malloc(W0_size*sizeof(double));
+	W1_rate = malloc(W1_size*sizeof(double));
+	best_W0_rate = malloc(W0_size*sizeof(double));
+	best_W1_rate = malloc(W1_size*sizeof(double));
+	memcpy(W0_old, W0, W0_size*sizeof(double));
+	memcpy(W0_old2, W0, W0_size*sizeof(double));
+	memset(W0_grad, 0, W0_size*sizeof(double));
+	memset(W0_oldgrad, 0, W0_size*sizeof(double));
+	memcpy(W1_old, W1, W1_size*sizeof(double));
+	memcpy(W1_old2, W1, W1_size*sizeof(double));
+	memset(W1_grad, 0, W1_size*sizeof(double));
+	memset(W1_oldgrad, 0, W1_size*sizeof(double));
+	
+	rate /= nbSamples;
+	for (i=0;i<hiddenDim;i++)
+		for (j=0;j<inDim+1;j++)
+			W0_rate[i*(inDim+1)+j] = rate*net->in_rate[j];
+	for (i=0;i<W1_size;i++)
+		W1_rate[i] = rate;
+	
+	for (i=0;i<NB_THREADS;i++)
+	{
+		args[i].net = net;
+		args[i].inputs = inputs+i*samplePerPart*inDim;
+		args[i].outputs = outputs+i*samplePerPart*outDim;
+		args[i].nbSamples = samplePerPart;
+		args[i].id = i;
+		args[i].done = 0;
+		sem_init(&sem_begin[i], 0, 0);
+		sem_init(&sem_end[i], 0, 0);
+		pthread_create(&thread[i], NULL, gradient_thread_process, &args[i]);
+	}
+	for (e=0;e<nbEpoch;e++)
+	{
+		double rms=0;
+                double error_rate = 0;
+		for (i=0;i<NB_THREADS;i++)
+		{
+			sem_post(&sem_begin[i]);
+		}
+		memset(W0_grad, 0, W0_size*sizeof(double));
+		memset(W1_grad, 0, W1_size*sizeof(double));
+		for (i=0;i<NB_THREADS;i++)
+		{
+			sem_wait(&sem_end[i]);
+			rms += args[i].rms;
+			error_rate += args[i].error_rate;
+			for (j=0;j<W0_size;j++)
+				W0_grad[j] += args[i].W0_grad[j];
+			for (j=0;j<W1_size;j++)
+				W1_grad[j] += args[i].W1_grad[j];
+		}
+
+		float mean_rate = 0, min_rate = 1e10;
+		rms = (rms/(outDim*nbSamples));
+		error_rate = (error_rate/(outDim*nbSamples));
+		fprintf (stderr, "%f (%f %f) ", error_rate, rms, best_rms);
+		if (rms < best_rms)
+		{
+			best_rms = rms;
+			for (i=0;i<W0_size;i++)
+			{
+				best_W0[i] = W0[i];
+				best_W0_rate[i] = W0_rate[i];
+			}
+			for (i=0;i<W1_size;i++)
+			{
+				best_W1[i] = W1[i];
+				best_W1_rate[i] = W1_rate[i];
+			}
+			count_worse=0;
+			count_retries=0;
+		} else {
+			count_worse++;
+			if (count_worse>30)
+			{
+			    count_retries++;
+				count_worse=0;
+				for (i=0;i<W0_size;i++)
+				{
+					W0[i] = best_W0[i];
+					best_W0_rate[i] *= .7;
+					if (best_W0_rate[i]<1e-15) best_W0_rate[i]=1e-15;
+					W0_rate[i] = best_W0_rate[i];
+					W0_grad[i] = 0;
+				}
+				for (i=0;i<W1_size;i++)
+				{
+					W1[i] = best_W1[i];
+					best_W1_rate[i] *= .8;
+					if (best_W1_rate[i]<1e-15) best_W1_rate[i]=1e-15;
+					W1_rate[i] = best_W1_rate[i];
+					W1_grad[i] = 0;
+				}
+			}
+		}
+		if (count_retries>10)
+		    break;
+		for (i=0;i<W0_size;i++)
+		{
+			if (W0_oldgrad[i]*W0_grad[i] > 0)
+				W0_rate[i] *= 1.01;
+			else if (W0_oldgrad[i]*W0_grad[i] < 0)
+				W0_rate[i] *= .9;
+			mean_rate += W0_rate[i];
+			if (W0_rate[i] < min_rate)
+				min_rate = W0_rate[i];
+			if (W0_rate[i] < 1e-15)
+				W0_rate[i] = 1e-15;
+			/*if (W0_rate[i] > .01)
+				W0_rate[i] = .01;*/
+			W0_oldgrad[i] = W0_grad[i];
+			W0_old2[i] = W0_old[i];
+			W0_old[i] = W0[i];
+			W0[i] += W0_grad[i]*W0_rate[i];
+		}
+		for (i=0;i<W1_size;i++)
+		{
+			if (W1_oldgrad[i]*W1_grad[i] > 0)
+				W1_rate[i] *= 1.01;
+			else if (W1_oldgrad[i]*W1_grad[i] < 0)
+				W1_rate[i] *= .9;
+			mean_rate += W1_rate[i];
+			if (W1_rate[i] < min_rate)
+				min_rate = W1_rate[i];
+			if (W1_rate[i] < 1e-15)
+				W1_rate[i] = 1e-15;
+			W1_oldgrad[i] = W1_grad[i];
+			W1_old2[i] = W1_old[i];
+			W1_old[i] = W1[i];
+			W1[i] += W1_grad[i]*W1_rate[i];
+		}
+		mean_rate /= (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2];
+		fprintf (stderr, "%g %d", mean_rate, e);
+		if (count_retries)
+		    fprintf(stderr, " %d", count_retries);
+		fprintf(stderr, "\n");
+		if (stopped)
+			break;
+	}
+	for (i=0;i<NB_THREADS;i++)
+	{
+		args[i].done = 1;
+		sem_post(&sem_begin[i]);
+		pthread_join(thread[i], NULL);
+		fprintf (stderr, "joined %d\n", i);
+	}
+	free(W0_old);
+	free(W1_old);
+	free(W0_grad);
+	free(W1_grad);
+	free(W0_rate);
+	free(W1_rate);
+	return best_rms;
+}
+
+int main(int argc, char **argv)
+{
+	int i, j;
+	int nbInputs;
+	int nbOutputs;
+	int nbHidden;
+	int nbSamples;
+	int nbEpoch;
+	int nbRealInputs;
+	unsigned int seed;
+	int ret;
+	float rms;
+	float *inputs;
+	float *outputs;
+	if (argc!=6)
+	{
+		fprintf (stderr, "usage: mlp_train <inputs> <hidden> <outputs> <nb samples> <nb epoch>\n");
+		return 1;
+	}
+	nbInputs = atoi(argv[1]);
+	nbHidden = atoi(argv[2]);
+	nbOutputs = atoi(argv[3]);
+	nbSamples = atoi(argv[4]);
+	nbEpoch = atoi(argv[5]);
+	nbRealInputs = nbInputs;
+	inputs = malloc(nbInputs*nbSamples*sizeof(*inputs));
+	outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));
+	
+	seed = time(NULL);
+	fprintf (stderr, "Seed is %u\n", seed);
+	srand(seed);
+	build_tansig_table();
+	signal(SIGTERM, handler);
+	signal(SIGINT, handler);
+	signal(SIGHUP, handler);
+	for (i=0;i<nbSamples;i++)
+	{
+		for (j=0;j<nbRealInputs;j++)
+			ret = scanf(" %f", &inputs[i*nbInputs+j]);
+		for (j=0;j<nbOutputs;j++)
+			ret = scanf(" %f", &outputs[i*nbOutputs+j]);
+		if (feof(stdin))
+		{
+			nbSamples = i;
+			break;
+		}
+	}
+	int topo[3] = {nbInputs, nbHidden, nbOutputs};
+	MLPTrain *net;
+
+	fprintf (stderr, "Got %d samples\n", nbSamples);
+	net = mlp_init(topo, 3, inputs, outputs, nbSamples);
+	rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1);
+	printf ("#include \"mlp.h\"\n\n");
+	printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed);
+	printf ("static const float weights[%d] = {\n", (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2]);
+	printf ("\n/* hidden layer */\n");
+	for (i=0;i<(topo[0]+1)*topo[1];i++)
+	{
+		printf ("%g, ", net->weights[0][i]);
+		if (i%5==4)
+			printf("\n");
+	}
+	printf ("\n/* output layer */\n");
+	for (i=0;i<(topo[1]+1)*topo[2];i++)
+	{
+		printf ("%g, ", net->weights[1][i]);
+		if (i%5==4)
+			printf("\n");
+	}
+	printf ("};\n\n");
+	printf ("static const int topo[3] = {%d, %d, %d};\n\n", topo[0], topo[1], topo[2]);
+	printf ("const MLP net = {\n");
+	printf ("\t3,\n");
+	printf ("\ttopo,\n");
+	printf ("\tweights\n};\n");
+	return 0;
+}
diff --git a/src/mlp_train.h b/src/mlp_train.h
new file mode 100644
index 00000000..1857f644
--- /dev/null
+++ b/src/mlp_train.h
@@ -0,0 +1,86 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+   Written by Jean-Marc Valin */
+/*
+   Redistribution and use in source and binary forms, with or without
+   modification, are permitted provided that the following conditions
+   are met:
+
+   - Redistributions of source code must retain the above copyright
+   notice, this list of conditions and the following disclaimer.
+
+   - Redistributions in binary form must reproduce the above copyright
+   notice, this list of conditions and the following disclaimer in the
+   documentation and/or other materials provided with the distribution.
+
+   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+   ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+   LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+   A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
+   CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+   EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+   PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+   PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+   LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+   NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+   SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _MLP_TRAIN_H_
+#define _MLP_TRAIN_H_
+
+#include <math.h>
+#include <stdlib.h>
+
+double tansig_table[501];
+static inline double tansig_double(double x) 
+{
+	return 2./(1.+exp(-2.*x)) - 1.;
+}
+static inline void build_tansig_table()
+{
+	int i;
+	for (i=0;i<501;i++)
+		tansig_table[i] = tansig_double(.04*(i-250));
+}
+
+static inline double tansig_approx(double x)
+{
+	int i;
+	double y, dy;
+	if (x>=10)
+		return 1;
+	if (x<=-10)
+		return -1;
+	i = lrint(25*x);
+	x -= .04*i;
+	y = tansig_table[250+i];
+	dy = 1-y*y;
+	y = y + x*dy*(1 - y*x);
+	return y;
+}
+
+inline float randn(float sd)
+{
+   float U1, U2, S, x;
+   do {
+      U1 = ((float)rand())/RAND_MAX;
+      U2 = ((float)rand())/RAND_MAX;
+      U1 = 2*U1-1;
+      U2 = 2*U2-1;
+      S = U1*U1 + U2*U2;
+   } while (S >= 1 || S == 0.0f);
+   x = sd*sqrt(-2 * log(S) / S) * U1;
+   return x;
+}
+
+
+typedef struct {
+	int layers;
+	int *topo;
+	double **weights;
+	double **best_weights;
+	double *in_rate;
+} MLPTrain;
+
+
+#endif /* _MLP_TRAIN_H_ */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index ea6bfa39..1e8f5972 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -40,7 +40,8 @@
 #include "arch.h"
 #include "opus_private.h"
 #include "os_support.h"
-
+#include "analysis.h"
+#include "mathops.h"
 #include "tuning_parameters.h"
 #ifdef FIXED_POINT
 #include "fixed/structs_FIX.h"
@@ -84,7 +85,9 @@ struct OpusEncoder {
     /* Sampling rate (at the API level) */
     int          first;
     opus_val16   delay_buffer[MAX_ENCODER_BUFFER*2];
-
+#ifndef FIXED_POINT
+    TonalityAnalysisState analysis;
+#endif
     opus_uint32  rangeFinal;
 };
 
@@ -365,6 +368,56 @@ static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *ou
 #endif
 }
 
+#ifdef FIXED_POINT
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+   int c, i;
+   int shift;
+
+   /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */
+   shift=celt_ilog2(Fs/(cutoff_Hz*3));
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x, tmp, y;
+         x = SHL32(EXTEND32(in[channels*i+c]), 15);
+         /* First stage */
+         tmp = x-hp_mem[2*c];
+         hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift);
+         /* Second stage */
+         y = tmp - hp_mem[2*c+1];
+         hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift);
+         out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767));
+      }
+   }
+}
+
+#else
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+   int c, i;
+   float coef;
+
+   coef = 4.*cutoff_Hz/Fs;
+   for (c=0;c<channels;c++)
+   {
+      for (i=0;i<len;i++)
+      {
+         opus_val32 x, tmp, y;
+         x = in[channels*i+c];
+         /* First stage */
+         tmp = x-hp_mem[2*c];
+         hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]);
+         /* Second stage */
+         y = tmp - hp_mem[2*c+1];
+         hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]);
+         out[channels*i+c] = y;
+      }
+   }
+}
+#endif
+
 static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
         int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
 {
@@ -472,6 +525,11 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
     opus_int32 max_rate;
     int curr_bandwidth;
     opus_int32 max_data_bytes;
+    int extra_buffer, total_buffer;
+    int perform_analysis=0;
+#ifndef FIXED_POINT
+    AnalysisInfo analysis_info;
+#endif
     VARDECL(opus_val16, tmp_prefill);
 
     ALLOC_STACK;
@@ -493,11 +551,20 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
     silk_enc = (char*)st+st->silk_enc_offset;
     celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
 
+#ifndef FIXED_POINT
+    perform_analysis = st->silk_mode.complexity >= 7 && frame_size >= st->Fs/100 && st->Fs==48000;
+#endif
     if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
        delay_compensation = 0;
     else
        delay_compensation = st->delay_compensation;
-
+    if (perform_analysis)
+    {
+       total_buffer = IMAX(st->Fs/200, delay_compensation);
+    } else {
+       total_buffer = delay_compensation;
+    }
+    extra_buffer = total_buffer-delay_compensation;
     st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
 
     frame_rate = st->Fs/frame_size;
@@ -829,9 +896,9 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
 
     ec_enc_init(&enc, data, max_data_bytes-1);
 
-    ALLOC(pcm_buf, (delay_compensation+frame_size)*st->channels, opus_val16);
-    for (i=0;i<delay_compensation*st->channels;i++)
-       pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-delay_compensation)*st->channels+i];
+    ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
+    for (i=0;i<total_buffer*st->channels;i++)
+       pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i];
 
     if (st->mode == MODE_CELT_ONLY)
        hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
@@ -846,12 +913,26 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
 
     if (st->application == OPUS_APPLICATION_VOIP)
     {
-       hp_cutoff(pcm, cutoff_Hz, &pcm_buf[delay_compensation*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
+       hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
     } else {
-       for (i=0;i<frame_size*st->channels;i++)
-          pcm_buf[delay_compensation*st->channels + i] = pcm[i];
+       dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
     }
 
+#ifndef FIXED_POINT
+    if (perform_analysis)
+    {
+       int nb_analysis_frames;
+       nb_analysis_frames = frame_size/(st->Fs/100);
+       for (i=0;i<nb_analysis_frames;i++)
+          tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels);
+       if (st->signal_type == OPUS_AUTO)
+          st->voice_ratio = floor(.5+100*(1-analysis_info.music_prob));
+    } else {
+       analysis_info.valid = 0;
+       st->voice_ratio = -1;
+    }
+#endif
+
     /* SILK processing */
     if (st->mode != MODE_CELT_ONLY)
     {
@@ -957,10 +1038,10 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
         }
 
 #ifdef FIXED_POINT
-        pcm_silk = pcm_buf+delay_compensation*st->channels;
+        pcm_silk = pcm_buf+total_buffer*st->channels;
 #else
         for (i=0;i<frame_size*st->channels;i++)
-            pcm_silk[i] = FLOAT2INT16(pcm_buf[delay_compensation*st->channels + i]);
+            pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]);
 #endif
         ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 );
         if( ret ) {
@@ -1061,13 +1142,13 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
     if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
     {
        for (i=0;i<st->channels*st->Fs/400;i++)
-          tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-st->delay_compensation-st->Fs/400)*st->channels + i];
+          tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
     }
 
-    for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+delay_compensation));i++)
+    for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
         st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size];
     for (;i<st->encoder_buffer*st->channels;i++)
-        st->delay_buffer[i] = pcm_buf[(frame_size+delay_compensation-st->encoder_buffer)*st->channels+i];
+        st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i];
 
 
     if (st->mode != MODE_HYBRID || st->stream_channels==1)
@@ -1088,7 +1169,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
             g1 *= (1.f/16384);
             g2 *= (1.f/16384);
 #endif
-            stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
+            stereo_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels, g1, g2, celt_mode->overlap,
                   frame_size, st->channels, celt_mode->window, st->Fs);
             st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
         }
@@ -1140,7 +1221,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
         int err;
         celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
         celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
-        err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
+        err = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
         if (err < 0)
         {
            RESTORE_STACK;
@@ -1166,7 +1247,11 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
         /* If false, we already busted the budget and we'll end up with a "PLC packet" */
         if (ec_tell(&enc) <= 8*nb_compr_bytes)
         {
-           ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
+#ifndef FIXED_POINT
+           if (perform_analysis)
+              celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+#endif
+           ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc);
            if (ret < 0)
            {
               RESTORE_STACK;
@@ -1189,9 +1274,9 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
         celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
 
         /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
-        celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
+        celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2-N4), N4, dummy, 2, NULL);
 
-        err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
+        err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
         if (err < 0)
         {
            RESTORE_STACK;
diff --git a/src/tansig_table.h b/src/tansig_table.h
new file mode 100644
index 00000000..a5aba230
--- /dev/null
+++ b/src/tansig_table.h
@@ -0,0 +1,45 @@
+/* This file is auto-generated by gen_tables */
+
+static const opus_val16 tansig_table[201] = {
+0.000000, 0.039979, 0.079830, 0.119427, 0.158649, 
+0.197375, 0.235496, 0.272905, 0.309507, 0.345214, 
+0.379949, 0.413644, 0.446244, 0.477700, 0.507977, 
+0.537050, 0.564900, 0.591519, 0.616909, 0.641077, 
+0.664037, 0.685809, 0.706419, 0.725897, 0.744277, 
+0.761594, 0.777888, 0.793199, 0.807569, 0.821040, 
+0.833655, 0.845456, 0.856485, 0.866784, 0.876393, 
+0.885352, 0.893698, 0.901468, 0.908698, 0.915420, 
+0.921669, 0.927473, 0.932862, 0.937863, 0.942503, 
+0.946806, 0.950795, 0.954492, 0.957917, 0.961090, 
+0.964028, 0.966747, 0.969265, 0.971594, 0.973749, 
+0.975743, 0.977587, 0.979293, 0.980869, 0.982327, 
+0.983675, 0.984921, 0.986072, 0.987136, 0.988119, 
+0.989027, 0.989867, 0.990642, 0.991359, 0.992020, 
+0.992631, 0.993196, 0.993718, 0.994199, 0.994644, 
+0.995055, 0.995434, 0.995784, 0.996108, 0.996407, 
+0.996682, 0.996937, 0.997172, 0.997389, 0.997590, 
+0.997775, 0.997946, 0.998104, 0.998249, 0.998384, 
+0.998508, 0.998623, 0.998728, 0.998826, 0.998916, 
+0.999000, 0.999076, 0.999147, 0.999213, 0.999273, 
+0.999329, 0.999381, 0.999428, 0.999472, 0.999513, 
+0.999550, 0.999585, 0.999617, 0.999646, 0.999673, 
+0.999699, 0.999722, 0.999743, 0.999763, 0.999781, 
+0.999798, 0.999813, 0.999828, 0.999841, 0.999853, 
+0.999865, 0.999875, 0.999885, 0.999893, 0.999902, 
+0.999909, 0.999916, 0.999923, 0.999929, 0.999934, 
+0.999939, 0.999944, 0.999948, 0.999952, 0.999956, 
+0.999959, 0.999962, 0.999965, 0.999968, 0.999970, 
+0.999973, 0.999975, 0.999977, 0.999978, 0.999980, 
+0.999982, 0.999983, 0.999984, 0.999986, 0.999987, 
+0.999988, 0.999989, 0.999990, 0.999990, 0.999991, 
+0.999992, 0.999992, 0.999993, 0.999994, 0.999994, 
+0.999994, 0.999995, 0.999995, 0.999996, 0.999996, 
+0.999996, 0.999997, 0.999997, 0.999997, 0.999997, 
+0.999997, 0.999998, 0.999998, 0.999998, 0.999998, 
+0.999998, 0.999998, 0.999999, 0.999999, 0.999999, 
+0.999999, 0.999999, 0.999999, 0.999999, 0.999999, 
+0.999999, 0.999999, 0.999999, 0.999999, 0.999999, 
+1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 
+1.000000, 1.000000, 1.000000, 1.000000, 1.000000, 
+1.000000, 
+};