summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2012-10-09 03:07:06 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2012-10-09 03:07:06 -0400
commit7315b35e13a3a7c504ed6b1fe2d28ad500eb2701 (patch)
treec6ed1c6869b13c5e2514c3ff7cfda4ce350f3fc8
parentca82894ef16bbd74839cb93e35486e5a3b90426d (diff)
parent317ffc203efc63333fc3b6a42fdb2887321a4325 (diff)
downloadopus-7315b35e13a3a7c504ed6b1fe2d28ad500eb2701.tar.gz
Merge branch 'exp_analysis7'
Conflicts: celt/celt.c celt/mdct.c include/opus_defines.h src/opus_encoder.c
-rw-r--r--Makefile.am1
-rw-r--r--celt/bands.c15
-rw-r--r--celt/bands.h2
-rw-r--r--celt/celt.c682
-rw-r--r--celt/celt.h24
-rw-r--r--celt/mathops.h48
-rw-r--r--celt/mdct.c6
-rw-r--r--celt/pitch.c10
-rw-r--r--celt/quant_bands.c8
-rw-r--r--celt/quant_bands.h6
-rw-r--r--opus_headers.mk5
-rw-r--r--opus_sources.mk5
-rw-r--r--src/analysis.c460
-rw-r--r--src/analysis.h60
-rw-r--r--src/mlp.c109
-rw-r--r--src/mlp.h41
-rw-r--r--src/mlp_data.c73
-rw-r--r--src/mlp_train.c496
-rw-r--r--src/mlp_train.h86
-rw-r--r--src/opus_encoder.c123
-rw-r--r--src/tansig_table.h45
21 files changed, 2070 insertions, 235 deletions
diff --git a/Makefile.am b/Makefile.am
index db37d998..07876626 100644
--- a/Makefile.am
+++ b/Makefile.am
@@ -14,6 +14,7 @@ if FIXED_POINT
SILK_SOURCES += $(SILK_SOURCES_FIXED)
else
SILK_SOURCES += $(SILK_SOURCES_FLOAT)
+OPUS_SOURCES += $(OPUS_SOURCES_FLOAT)
endif
include celt_headers.mk
diff --git a/celt/bands.c b/celt/bands.c
index f38b6626..531d3118 100644
--- a/celt/bands.c
+++ b/celt/bands.c
@@ -41,6 +41,21 @@
#include "mathops.h"
#include "rate.h"
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev)
+{
+ int i;
+ for (i=0;i<N;i++)
+ {
+ if (val < thresholds[i])
+ break;
+ }
+ if (i>prev && val < thresholds[prev]+hysteresis[prev])
+ i=prev;
+ if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1])
+ i=prev;
+ return i;
+}
+
opus_uint32 celt_lcg_rand(opus_uint32 seed)
{
return 1664525 * seed + 1013904223;
diff --git a/celt/bands.h b/celt/bands.h
index 9ff8ffd7..47d15b6d 100644
--- a/celt/bands.h
+++ b/celt/bands.h
@@ -92,4 +92,6 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas
opus_uint32 celt_lcg_rand(opus_uint32 seed);
+int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev);
+
#endif /* BANDS_H */
diff --git a/celt/celt.c b/celt/celt.c
index 28c228d0..7580fa2d 100644
--- a/celt/celt.c
+++ b/celt/celt.c
@@ -178,6 +178,7 @@ struct OpusCustomEncoder {
int prefilter_tapset_old;
#endif
int consec_transient;
+ AnalysisInfo analysis;
opus_val32 preemph_memE[2];
opus_val32 preemph_memD[2];
@@ -187,6 +188,9 @@ struct OpusCustomEncoder {
opus_int32 vbr_drift;
opus_int32 vbr_offset;
opus_int32 vbr_count;
+ opus_val16 overlap_max;
+ opus_val16 stereo_saving;
+ int intensity;
#ifdef RESYNTH
celt_sig syn_mem[2][2*MAX_PERIOD];
@@ -303,92 +307,128 @@ static inline opus_val16 SIG2WORD16(celt_sig x)
}
static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C,
- int overlap)
+ int overlap, opus_val16 *tf_estimate, int *tf_chan, AnalysisInfo *analysis)
{
int i;
VARDECL(opus_val16, tmp);
- opus_val32 mem0=0,mem1=0;
+ opus_val32 mem0,mem1;
int is_transient = 0;
int block;
- int N;
+ int c, N;
+ opus_val16 maxbin;
+ int tf_max;
VARDECL(opus_val16, bins);
+ opus_val16 T1, T2, T3, T4, T5;
+ opus_val16 follower;
+ int metric=0;
+ int fmetric=0, bmetric=0;
+ int count1, count2, count3, count4, count5;;
+
SAVE_STACK;
ALLOC(tmp, len, opus_val16);
- block = overlap/2;
- N=len/block;
+ block = overlap/4;
+ N=len/block-1;
ALLOC(bins, N, opus_val16);
- if (C==1)
+
+ tf_max = 0;
+ for (c=0;c<C;c++)
{
+ mem0=0;
+ mem1=0;
for (i=0;i<len;i++)
- tmp[i] = SHR32(in[i],SIG_SHIFT);
- } else {
- for (i=0;i<len;i++)
- tmp[i] = SHR32(ADD32(in[i],in[i+len]), SIG_SHIFT+1);
- }
+ tmp[i] = SHR32(in[i+c*len],SIG_SHIFT);
- /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
- for (i=0;i<len;i++)
- {
- opus_val32 x,y;
- x = tmp[i];
- y = ADD32(mem0, x);
+ /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */
+ for (i=0;i<len;i++)
+ {
+ opus_val32 x,y;
+ x = tmp[i];
+ y = ADD32(mem0, x);
#ifdef FIXED_POINT
- mem0 = mem1 + y - SHL32(x,1);
- mem1 = x - SHR32(y,1);
+ mem0 = mem1 + y - SHL32(x,1);
+ mem1 = x - SHR32(y,1);
#else
- mem0 = mem1 + y - 2*x;
- mem1 = x - .5f*y;
+ mem0 = mem1 + y - 2*x;
+ mem1 = x - .5f*y;
#endif
- tmp[i] = EXTRACT16(SHR32(y,2));
- }
- /* First few samples are bad because we don't propagate the memory */
- for (i=0;i<12;i++)
- tmp[i] = 0;
+ tmp[i] = EXTRACT16(SHR(y,2));
+ }
+ /* First few samples are bad because we don't propagate the memory */
+ for (i=0;i<12;i++)
+ tmp[i] = 0;
- for (i=0;i<N;i++)
- {
- int j;
- opus_val16 max_abs=0;
- for (j=0;j<block;j++)
- max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
- bins[i] = max_abs;
- }
- for (i=0;i<N;i++)
- {
- int j;
- int conseq=0;
- opus_val16 t1, t2, t3;
-
- t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
- t2 = MULT16_16_Q15(QCONST16(.4f, 15), bins[i]);
- t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]);
- for (j=0;j<i;j++)
- {
- if (bins[j] < t1)
- conseq++;
- if (bins[j] < t2)
- conseq++;
- else
- conseq = 0;
+ maxbin=0;
+ for (i=0;i<N;i++)
+ {
+ int j;
+ opus_val16 max_abs=0;
+ for (j=0;j<2*block;j++)
+ max_abs = MAX16(max_abs, ABS16(tmp[i*block+j]));
+ bins[i] = max_abs;
+ maxbin = MAX16(maxbin, bins[i]);
}
- if (conseq>=3)
+
+ T1 = QCONST16(.09f, 15);
+ T2 = QCONST16(.12f, 15);
+ T3 = QCONST16(.18f, 15);
+ T4 = QCONST16(.28f, 15);
+ T5 = QCONST16(.4f, 15);
+
+ follower = 0;
+ count1=count2=count3=count4=count5=0;
+ for (i=0;i<N;i++)
+ {
+ follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower));
+ if (bins[i] < MULT16_16_Q15(T1, follower))
+ count1++;
+ if (bins[i] < MULT16_16_Q15(T2, follower))
+ count2++;
+ if (bins[i] < MULT16_16_Q15(T3, follower))
+ count3++;
+ if (bins[i] < MULT16_16_Q15(T4, follower))
+ count4++;
+ if (bins[i] < MULT16_16_Q15(T5, follower))
+ count5++;
+ }
+ fmetric = (5*count1 + 4*count2 + 3*count3 + 2*count4 + count5)/2;
+ follower=0;
+ count1=count2=count3=count4=count5=0;
+ for (i=N-1;i>=0;i--)
+ {
+ follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower));
+ if (bins[i] < MULT16_16_Q15(T1, follower))
+ count1++;
+ if (bins[i] < MULT16_16_Q15(T2, follower))
+ count2++;
+ if (bins[i] < MULT16_16_Q15(T3, follower))
+ count3++;
+ if (bins[i] < MULT16_16_Q15(T4, follower))
+ count4++;
+ if (bins[i] < MULT16_16_Q15(T5, follower))
+ count5++;
+ }
+ bmetric = 5*count1 + 4*count2 + 3*count3 + 2*count4 + count5;
+ metric = fmetric+bmetric;
+
+ /*if (metric>40)*/
+ if (metric>20+50*MAX16(analysis->tonality, analysis->noisiness))
is_transient=1;
- conseq = 0;
- for (j=i+1;j<N;j++)
+
+ if (metric>tf_max)
{
- if (bins[j] < t3)
- conseq++;
- else
- conseq = 0;
+ *tf_chan = c;
+ tf_max = metric;
}
- if (conseq>=7)
- is_transient=1;
}
+ /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */
+ *tf_estimate = QCONST16(1.f, 14) + celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),IMIN(163,tf_max)),14)-QCONST32(0.139,28)));
+
RESTORE_STACK;
#ifdef FUZZING
is_transient = rand()&0x1;
#endif
+ /*printf("%d %f %f %f %f\n", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/
return is_transient;
}
@@ -545,34 +585,22 @@ static const signed char tf_select_table[4][8] = {
{0, -2, 0, -3, 3, 0, 1,-1},
};
-static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, int width)
+static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias)
{
- int i, j;
- static const opus_val16 sqrtM_1[4] = {Q15ONE, QCONST16(.70710678f,15), QCONST16(0.5f,15), QCONST16(0.35355339f,15)};
+ int i;
opus_val32 L1;
- opus_val16 bias;
- L1=0;
- for (i=0;i<1<<LM;i++)
- {
- opus_val32 L2 = 0;
- for (j=0;j<N>>LM;j++)
- L2 = MAC16_16(L2, tmp[(j<<LM)+i], tmp[(j<<LM)+i]);
- L1 += celt_sqrt(L2);
- }
- L1 = MULT16_32_Q15(sqrtM_1[LM], L1);
- if (width==1)
- bias = QCONST16(.12f,15)*LM;
- else if (width==2)
- bias = QCONST16(.05f,15)*LM;
- else
- bias = QCONST16(.02f,15)*LM;
- L1 = MAC16_32_Q15(L1, bias, L1);
+ L1 = 0;
+ for (i=0;i<N;i++)
+ L1 += EXTEND32(ABS16(tmp[i]));
+ /* When in doubt, prefer good freq resolution */
+ L1 = MAC16_32_Q15(L1, LM*bias, L1);
return L1;
+
}
static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
int *tf_res, int nbCompressedBytes, celt_norm *X, int N0, int LM,
- int *tf_sum)
+ int *tf_sum, opus_val16 tf_estimate, int tf_chan)
{
int i;
VARDECL(int, metric);
@@ -581,9 +609,16 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
VARDECL(int, path0);
VARDECL(int, path1);
VARDECL(celt_norm, tmp);
+ VARDECL(celt_norm, tmp_1);
int lambda;
+ int sel;
+ int selcost[2];
int tf_select=0;
+ opus_val16 bias;
+
SAVE_STACK;
+ bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(1.5f,14)-tf_estimate));
+ /*printf("%f ", bias);*/
if (nbCompressedBytes<15*C)
{
@@ -600,9 +635,10 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
lambda = 4;
else
lambda = 3;
-
+ lambda*=2;
ALLOC(metric, len, int);
ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
+ ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm);
ALLOC(path0, len, int);
ALLOC(path1, len, int);
@@ -610,19 +646,35 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
for (i=0;i<len;i++)
{
int j, k, N;
+ int narrow;
opus_val32 L1, best_L1;
int best_level=0;
N = (m->eBands[i+1]-m->eBands[i])<<LM;
+ /* band is too narrow to be split down to LM=-1 */
+ narrow = (m->eBands[i+1]-m->eBands[i])==1;
for (j=0;j<N;j++)
- tmp[j] = X[j+(m->eBands[i]<<LM)];
+ tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)];
/* Just add the right channel if we're in stereo */
- if (C==2)
+ /*if (C==2)
for (j=0;j<N;j++)
- tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));
- L1 = l1_metric(tmp, N, isTransient ? LM : 0, N>>LM);
+ tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/
+ L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias);
best_L1 = L1;
+ /* Check the -1 case for transients */
+ if (isTransient && !narrow)
+ {
+ for (j=0;j<N;j++)
+ tmp_1[j] = tmp[j];
+ haar1(tmp_1, N>>LM, 1<<LM);
+ L1 = l1_metric(tmp_1, N, LM+1, bias);
+ if (L1<best_L1)
+ {
+ best_L1 = L1;
+ best_level = -1;
+ }
+ }
/*printf ("%f ", L1);*/
- for (k=0;k<LM;k++)
+ for (k=0;k<LM+!(isTransient||narrow);k++)
{
int B;
@@ -631,12 +683,9 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
else
B = k+1;
- if (isTransient)
- haar1(tmp, N>>(LM-k), 1<<(LM-k));
- else
- haar1(tmp, N>>k, 1<<k);
+ haar1(tmp, N>>k, 1<<k);
- L1 = l1_metric(tmp, N, B, N>>LM);
+ L1 = l1_metric(tmp, N, B, bias);
if (L1 < best_L1)
{
@@ -645,17 +694,40 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
}
}
/*printf ("%d ", isTransient ? LM-best_level : best_level);*/
+ /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */
if (isTransient)
- metric[i] = best_level;
+ metric[i] = 2*best_level;
else
- metric[i] = -best_level;
- *tf_sum += metric[i];
+ metric[i] = -2*best_level;
+ *tf_sum += (isTransient ? LM : 0) - metric[i]/2;
+ /* For bands that can't be split to -1, set the metric to the half-way point to avoid
+ biasing the decision */
+ if (narrow && (metric[i]==0 || metric[i]==-2*LM))
+ metric[i]-=1;
+ /*printf("%d ", metric[i]);*/
}
/*printf("\n");*/
- /* NOTE: Future optimized implementations could detect extreme transients and set
- tf_select = 1 but so far we have not found a reliable way of making this useful */
+ /* Search for the optimal tf resolution, including tf_select */
tf_select = 0;
-
+ for (sel=0;sel<2;sel++)
+ {
+ cost0 = 0;
+ cost1 = isTransient ? 0 : lambda;
+ for (i=1;i<len;i++)
+ {
+ int curr0, curr1;
+ curr0 = IMIN(cost0, cost1 + lambda);
+ curr1 = IMIN(cost0 + lambda, cost1);
+ cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]);
+ cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]);
+ }
+ cost0 = IMIN(cost0, cost1);
+ selcost[sel]=cost0;
+ }
+ /* For now, we're conservative and only allow tf_select=1 for transients.
+ * If tests confirm it's useful for non-transients, we could allow it. */
+ if (selcost[1]<selcost[0] && isTransient)
+ tf_select=1;
cost0 = 0;
cost1 = isTransient ? 0 : lambda;
/* Viterbi forward pass */
@@ -685,8 +757,8 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
curr1 = from1;
path1[i]= 1;
}
- cost0 = curr0 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+0]);
- cost1 = curr1 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+1]);
+ cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]);
+ cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]);
}
tf_res[len-1] = cost0 < cost1 ? 0 : 1;
/* Viterbi backward pass to check the decisions */
@@ -697,6 +769,7 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient,
else
tf_res[i] = path0[i+1];
}
+ /*printf("%d %f\n", *tf_sum, tf_estimate);*/
RESTORE_STACK;
#ifdef FUZZING
tf_select = rand()&0x1;
@@ -744,7 +817,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM,
tf_select = 0;
for (i=start;i<end;i++)
tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]];
- /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/
+ /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/
}
static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec)
@@ -798,15 +871,20 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C)
}
static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
- const opus_val16 *bandLogE, int end, int LM, int C, int N0)
+ const opus_val16 *bandLogE, int end, int LM, int C, int N0,
+ AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate,
+ int intensity)
{
int i;
opus_val32 diff=0;
int c;
int trim_index = 5;
+ opus_val16 trim = QCONST16(5.f, 8);
+ opus_val16 logXC, logXC2;
if (C==2)
{
opus_val16 sum = 0; /* Q10 */
+ opus_val16 minXC; /* Q10 */
/* Compute inter-channel correlation for low frequencies */
for (i=0;i<8;i++)
{
@@ -817,6 +895,15 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
sum = ADD16(sum, EXTRACT16(SHR32(partial, 18)));
}
sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum);
+ minXC = sum;
+ for (i=8;i<intensity;i++)
+ {
+ int j;
+ opus_val32 partial = 0;
+ for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++)
+ partial = MAC16_16(partial, X[j], X[N0+j]);
+ minXC = MIN16(minXC, EXTRACT16(SHR32(partial, 18)));
+ }
/*printf ("%f\n", sum);*/
if (sum > QCONST16(.995f,10))
trim_index-=4;
@@ -826,18 +913,28 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
trim_index-=2;
else if (sum > QCONST16(.8f,10))
trim_index-=1;
+ /* mid-side savings estimations based on the LF average*/
+ logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum));
+ /* mid-side savings estimations based on min correlation */
+ logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC)));
+#ifdef FIXED_POINT
+ /* Compensate for Q20 vs Q14 input and convert output to Q8 */
+ logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+ logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8);
+#endif
+
+ trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC));
+ *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2));
}
/* Estimate spectral tilt */
c=0; do {
for (i=0;i<end-1;i++)
{
- diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-m->nbEBands);
+ diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end);
}
} while (++c<C);
- /* We divide by two here to avoid making the tilt larger for stereo as a
- result of a bug in the loop above */
- diff /= 2*C*(end-1);
+ diff /= C*(end-1);
/*printf("%f\n", diff);*/
if (diff > QCONST16(2.f, DB_SHIFT))
trim_index--;
@@ -847,11 +944,25 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X,
trim_index++;
if (diff < -QCONST16(10.f, DB_SHIFT))
trim_index++;
+ trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 ));
+ trim -= 2*SHR16(tf_estimate-QCONST16(1.f,14), 14-8);
+#ifndef FIXED_POINT
+ if (analysis->valid)
+ {
+ trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05)));
+ }
+#endif
+#ifdef FIXED_POINT
+ trim_index = PSHR32(trim, 8);
+#else
+ trim_index = floor(.5+trim);
+#endif
if (trim_index<0)
trim_index = 0;
if (trim_index>10)
trim_index = 10;
+ /*printf("%d\n", trim_index);*/
#ifdef FUZZING
trim_index = rand()%11;
#endif
@@ -900,6 +1011,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
VARDECL(celt_norm, X);
VARDECL(celt_ener, bandE);
VARDECL(opus_val16, bandLogE);
+ VARDECL(opus_val16, bandLogE2);
VARDECL(int, fine_quant);
VARDECL(opus_val16, error);
VARDECL(int, pulses);
@@ -923,7 +1035,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
int alloc_trim;
int pitch_index=COMBFILTER_MINPERIOD;
opus_val16 gain1 = 0;
- int intensity=0;
int dual_stereo=0;
int effectiveBytes;
opus_val16 pf_threshold;
@@ -938,8 +1049,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
int anti_collapse_rsv;
int anti_collapse_on=0;
int silence=0;
+ int tf_chan = 0;
+ opus_val16 tf_estimate;
+ int pitch_change=0;
+ opus_int32 tot_boost=0;
+ opus_val16 sample_max;
+ opus_val16 maxDepth;
ALLOC_STACK;
+ tf_estimate = QCONST16(1.0f,14);
if (nbCompressedBytes<2 || pcm==NULL)
return OPUS_BAD_ARG;
@@ -1054,6 +1172,9 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(in, CC*(N+st->overlap), celt_sig);
+ sample_max=MAX16(st->overlap_max, celt_maxabs16(pcm, C*(N-st->mode->overlap)));
+ st->overlap_max=celt_maxabs16(pcm+C*(N-st->mode->overlap), C*st->mode->overlap);
+ sample_max=MAX16(sample_max, st->overlap_max);
/* Find pitch period and gain */
{
VARDECL(celt_sig, _pre);
@@ -1093,13 +1214,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
*inp = tmp + st->preemph_memE[c];
st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp)
- MULT16_32_Q15(st->mode->preemph[0], tmp);
- silence = silence && *inp == 0;
inp++;
}
OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD);
OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N);
} while (++c<CC);
+#ifdef FIXED_POINT
+ silence = (sample_max==0);
+#else
+ silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth));
+#endif
#ifdef FUZZING
if ((rand()&0x3F)==0)
silence = 1;
@@ -1129,8 +1254,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16);
pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC);
+ /* Don't search for the fir last 1.5 octave of the range because
+ there's too many false-positives due to short-term correlation */
pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N,
- COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index);
+ COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index);
pitch_index = COMBFILTER_MAXPERIOD-pitch_index;
gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD,
@@ -1138,6 +1265,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (pitch_index > COMBFILTER_MAXPERIOD-2)
pitch_index = COMBFILTER_MAXPERIOD-2;
gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1);
+ if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3
+ && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period))
+ pitch_change = 1;
+ /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/
if (st->loss_rate>2)
gain1 = HALF32(gain1);
if (st->loss_rate>4)
@@ -1236,7 +1367,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (st->complexity > 1)
{
isTransient = transient_analysis(in, N+st->overlap, CC,
- st->overlap);
+ st->overlap, &tf_estimate, &tf_chan, &st->analysis);
if (isTransient)
shortBlocks = M;
}
@@ -1253,6 +1384,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
{
for (i=0;i<N;i++)
freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i]));
+ tf_chan = 0;
}
if (st->upsample != 1)
{
@@ -1265,17 +1397,53 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
freq[c*N+i] = 0;
} while (++c<C);
}
- ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
-
compute_band_energies(st->mode, freq, bandE, effEnd, C, M);
amp2Log2(st->mode, effEnd, st->end, bandE, bandLogE, C);
+ /*for (i=0;i<21;i++)
+ printf("%f ", bandLogE[i]);
+ printf("\n");*/
+
+ ALLOC(bandLogE2, C*st->mode->nbEBands, opus_val16);
+ if (shortBlocks && st->complexity>=8)
+ {
+ VARDECL(celt_sig, freq2);
+ VARDECL(opus_val32, bandE2);
+ ALLOC(freq2, CC*N, celt_sig);
+ compute_mdcts(st->mode, 0, in, freq2, CC, LM);
+ if (CC==2&&C==1)
+ {
+ for (i=0;i<N;i++)
+ freq2[i] = ADD32(HALF32(freq2[i]), HALF32(freq2[N+i]));
+ }
+ if (st->upsample != 1)
+ {
+ c=0; do
+ {
+ int bound = N/st->upsample;
+ for (i=0;i<bound;i++)
+ freq2[c*N+i] *= st->upsample;
+ for (;i<N;i++)
+ freq2[c*N+i] = 0;
+ } while (++c<C);
+ }
+ ALLOC(bandE2, C*st->mode->nbEBands, opus_val32);
+ compute_band_energies(st->mode, freq2, bandE2, effEnd, C, M);
+ amp2Log2(st->mode, effEnd, st->end, bandE2, bandLogE2, C);
+ for (i=0;i<C*st->mode->nbEBands;i++)
+ bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT));
+ } else {
+ for (i=0;i<C*st->mode->nbEBands;i++)
+ bandLogE2[i] = bandLogE[i];
+ }
+
+ ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */
/* Band normalisation */
normalise_bands(st->mode, freq, X, bandE, effEnd, C, M);
ALLOC(tf_res, st->mode->nbEBands, int);
- tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum);
+ tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum, tf_estimate, tf_chan);
for (i=effEnd;i<st->end;i++)
tf_res[i] = tf_res[effEnd-1];
@@ -1287,7 +1455,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc);
- st->spread_decision = SPREAD_NORMAL;
if (ec_tell(enc)+4<=total_bits)
{
if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C)
@@ -1295,9 +1462,21 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
if (st->complexity == 0)
st->spread_decision = SPREAD_NONE;
} else {
- st->spread_decision = spreading_decision(st->mode, X,
- &st->tonal_average, st->spread_decision, &st->hf_average,
- &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
+ if (st->analysis.valid)
+ {
+ static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)};
+ static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)};
+ static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)};
+ static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)};
+ st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision);
+ st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision);
+ } else {
+ st->spread_decision = spreading_decision(st->mode, X,
+ &st->tonal_average, st->spread_decision, &st->hf_average,
+ &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M);
+ }
+ /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/
+ /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/
}
ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5);
}
@@ -1309,38 +1488,95 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
for (i=0;i<st->mode->nbEBands;i++)
offsets[i] = 0;
/* Dynamic allocation code */
+ maxDepth=-QCONST16(32.f, DB_SHIFT);
/* Make sure that dynamic allocation can't make us bust the budget */
if (effectiveBytes > 50 && LM>=1)
{
- int t1, t2;
- if (LM <= 1)
+ int last=0;
+ VARDECL(opus_val16, follower);
+ ALLOC(follower, C*st->mode->nbEBands, opus_val16);
+ c=0;do
+ {
+ follower[c*st->mode->nbEBands] = bandLogE2[c*st->mode->nbEBands];
+ for (i=1;i<st->end;i++)
+ {
+ /* The last band to be at least 3 dB higher than the previous one
+ is the last we'll consider. Otherwise, we run into problems on
+ bandlimited signals. */
+ if (bandLogE2[c*st->mode->nbEBands+i] > bandLogE2[c*st->mode->nbEBands+i-1]+QCONST16(.5f,DB_SHIFT))
+ last=i;
+ follower[c*st->mode->nbEBands+i] = MIN16(follower[c*st->mode->nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*st->mode->nbEBands+i]);
+ }
+ for (i=last-1;i>=0;i--)
+ follower[c*st->mode->nbEBands+i] = MIN16(follower[c*st->mode->nbEBands+i], MIN16(follower[c*st->mode->nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*st->mode->nbEBands+i]));
+ for (i=0;i<st->end;i++)
+ {
+ opus_val16 noise_floor;
+ /* Noise floor must take into account eMeans, the depth, the width of the bands
+ and the preemphasis filter (approx. square of bark band ID) */
+ noise_floor = MULT16_16(QCONST16(0.0625f, DB_SHIFT),st->mode->logN[i])
+ +QCONST16(.5f,DB_SHIFT)+SHL16(9-st->lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6)
+ +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5));
+ follower[c*st->mode->nbEBands+i] = MAX16(follower[c*st->mode->nbEBands+i], noise_floor);
+ maxDepth = MAX16(maxDepth, bandLogE[c*st->mode->nbEBands+i]-noise_floor);
+ }
+ } while (++c<C);
+ if (C==2)
{
- t1 = 3;
- t2 = 5;
+ for (i=st->start;i<st->end;i++)
+ {
+ /* Consider 24 dB "cross-talk" */
+ follower[st->mode->nbEBands+i] = MAX16(follower[st->mode->nbEBands+i], follower[ i]-QCONST16(4.f,DB_SHIFT));
+ follower[ i] = MAX16(follower[ i], follower[st->mode->nbEBands+i]-QCONST16(4.f,DB_SHIFT));
+ follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[st->mode->nbEBands+i]-follower[st->mode->nbEBands+i]));
+ }
} else {
- t1 = 2;
- t2 = 4;
+ for (i=st->start;i<st->end;i++)
+ {
+ follower[i] = MAX16(0, bandLogE[i]-follower[i]);
+ }
}
- for (i=st->start+1;i<st->end-1;i++)
+ /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */
+ if ((!st->vbr || st->constrained_vbr)&&!isTransient)
{
- opus_val32 d2;
- d2 = 2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1];
- if (C==2)
- d2 = HALF32(d2 + 2*bandLogE[i+st->mode->nbEBands]-
- bandLogE[i-1+st->mode->nbEBands]-bandLogE[i+1+st->mode->nbEBands]);
-#ifdef FUZZING
- if((rand()&0xF)==0)
+ for (i=st->start;i<st->end;i++)
+ follower[i] = HALF16(follower[i]);
+ }
+ for (i=st->start;i<st->end;i++)
+ {
+ int width;
+ int boost;
+ int boost_bits;
+
+ if (i<8)
+ follower[i] *= 2;
+ if (i>=12)
+ follower[i] = HALF16(follower[i]);
+ follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT));
+
+ /* FIXME: Adaptively reduce follower at low rate or for cbr/cvbr */
+ width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM;
+ if (width<6)
{
- offsets[i] += 1;
- if((rand()&0x3)==0)
- offsets[i] += 1+(rand()&0x3);
+ boost = SHR32(EXTEND32(follower[i]),DB_SHIFT);
+ boost_bits = boost*width<<BITRES;
+ } else if (width > 48) {
+ boost = SHR32(EXTEND32(follower[i])*8,DB_SHIFT);
+ boost_bits = (boost*width<<BITRES)/8;
+ } else {
+ boost = SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT);
+ boost_bits = boost*6<<BITRES;
+ }
+ /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */
+ if ((!st->vbr || (st->constrained_vbr&&!isTransient))
+ && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4)
+ {
+ offsets[i] = 0;
+ break;
+ } else {
+ offsets[i] = boost;
+ tot_boost += boost_bits;
}
-#else
- if (d2 > SHL16(t1,DB_SHIFT))
- offsets[i] += 1;
- if (d2 > SHL16(t2,DB_SHIFT))
- offsets[i] += 1;
-#endif
}
}
dynalloc_logp = 6;
@@ -1377,11 +1613,36 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
dynalloc_logp = IMAX(2, dynalloc_logp-1);
offsets[i] = boost;
}
+
+ if (C==2)
+ {
+ int effectiveRate;
+
+ static const opus_val16 intensity_thresholds[21]=
+ /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/
+ { 16,21,23,25,27,29,31,33,35,38,42,46,50,54,58,63,68,75,84,102,130};
+ static const opus_val16 intensity_histeresis[21]=
+ { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 8, 12};
+
+ /* Always use MS for 2.5 ms frames until we can do a better analysis */
+ if (LM!=0)
+ dual_stereo = stereo_analysis(st->mode, X, LM, N);
+
+ /* Account for coarse energy */
+ effectiveRate = (8*effectiveBytes - 80)>>LM;
+
+ /* effectiveRate in kb/s */
+ effectiveRate = 2*effectiveRate/5;
+
+ st->intensity = hysteresis_decision(effectiveRate, intensity_thresholds, intensity_histeresis, 21, st->intensity);
+ st->intensity = IMIN(st->end,IMAX(st->start, st->intensity));
+ }
+
alloc_trim = 5;
if (tell+(6<<BITRES) <= total_bits - total_boost)
{
alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE,
- st->end, LM, C, N);
+ st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity);
ec_enc_icdf(enc, alloc_trim, trim_icdf, 7);
tell = ec_tell_frac(enc);
}
@@ -1392,28 +1653,96 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
opus_val16 alpha;
opus_int32 delta;
/* The target rate in 8th bits per frame */
- opus_int32 target;
+ opus_int32 target, base_target;
opus_int32 min_allowed;
+ int coded_bins;
+ int coded_bands;
int lm_diff = st->mode->maxLM - LM;
+ coded_bands = st->lastCodedBands ? st->lastCodedBands : st->mode->nbEBands;
+ coded_bins = st->mode->eBands[coded_bands]<<LM;
+ if (C==2)
+ coded_bins += st->mode->eBands[IMIN(st->intensity, coded_bands)]<<LM;
/* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms.
The CELT allocator will just not be able to use more than that anyway. */
nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM));
- target = vbr_rate + (st->vbr_offset>>lm_diff) - ((40*C+20)<<BITRES);
+ target = vbr_rate - ((40*C+20)<<BITRES);
+ base_target = target;
+
+ if (st->constrained_vbr)
+ target += (st->vbr_offset>>lm_diff);
+
+ /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/
+#ifndef FIXED_POINT
+ if (st->analysis.valid && st->analysis.activity<.4)
+ target -= (coded_bins<<BITRES)*1*(.4-st->analysis.activity);
+#endif
+ /* Stereo savings */
+ if (C==2)
+ {
+ int coded_stereo_bands;
+ int coded_stereo_dof;
+ coded_stereo_bands = IMIN(st->intensity, coded_bands);
+ coded_stereo_dof = (st->mode->eBands[coded_stereo_bands]<<LM)-coded_stereo_bands;
+ /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/
+ target -= MIN32(target/3, SHR16(MULT16_16(st->stereo_saving,(coded_stereo_dof<<BITRES)),8));
+ target += MULT16_16_Q15(QCONST16(0.035,15),coded_stereo_dof<<BITRES);
+ }
+ /* Limits starving of other bands when using dynalloc */
+ target += tot_boost;
+ /* Compensates for the average transient boost */
+ target = MULT16_32_Q15(QCONST16(0.96f,15),target);
+ /* Apply transient boost */
+ target = SHL32(MULT16_32_Q15(tf_estimate, target),1);
+
+#ifndef FIXED_POINT
+ /* Apply tonality boost */
+ if (st->analysis.valid) {
+ int tonal_target;
+ float tonal;
+
+ /* Compensates for the average tonality boost */
+ target -= MULT16_16_Q15(QCONST16(0.13f,15),coded_bins<<BITRES);
+
+ tonal = MAX16(0,st->analysis.tonality-.2);
+ tonal_target = target + (coded_bins<<BITRES)*2.0f*tonal;
+ if (pitch_change)
+ tonal_target += (coded_bins<<BITRES)*.8;
+ /*printf("%f %f ", st->analysis.tonality, tonal);*/
+ target = IMAX(tonal_target,target);
+ }
+#endif
- /* Shortblocks get a large boost in bitrate, but since they
- are uncommon long blocks are not greatly affected */
- if (shortBlocks || tf_sum < -2*(st->end-st->start))
- target = 7*target/4;
- else if (tf_sum < -(st->end-st->start))
- target = 3*target/2;
- else if (M > 1)
- target-=(target+14)/28;
+ {
+ opus_int32 floor_depth;
+ int bins;
+ bins = st->mode->eBands[st->mode->nbEBands-2]<<LM;
+ /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/
+ floor_depth = SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT);
+ floor_depth = IMAX(floor_depth, target>>2);
+ target = IMIN(target, floor_depth);
+ /*printf("%f %d\n", maxDepth, floor_depth);*/
+ }
+
+ if (st->constrained_vbr || st->bitrate<64000)
+ {
+ opus_val16 rate_factor;
+#ifdef FIXED_POINT
+ rate_factor = MAX16(0,(st->bitrate-32000));
+#else
+ rate_factor = MAX16(0,(1.f/32768)*(st->bitrate-32000));
+#endif
+ if (st->constrained_vbr)
+ rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15));
+ target = base_target + MULT16_32_Q15(rate_factor, target-base_target);
+
+ }
+ /* Don't allow more than doubling the rate */
+ target = IMIN(2*base_target, target);
/* The current offset is removed from the target and the space used
so far is added*/
target=target+tell;
-
/* In VBR mode the frame size must not be reduced so much that it would
result in the encoder running out of bits.
The margin of 2 bytes ensures that none of the bust-prevention logic
@@ -1453,8 +1782,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/*printf ("%d\n", st->vbr_reservoir);*/
/* Compute the offset we need to apply in order to reach the target */
- st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
- st->vbr_offset = -st->vbr_drift;
+ if (st->constrained_vbr)
+ {
+ st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift);
+ st->vbr_offset = -st->vbr_drift;
+ }
/*printf ("%d\n", st->vbr_drift);*/
if (st->constrained_vbr && st->vbr_reservoir < 0)
@@ -1467,38 +1799,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/*printf ("+%d\n", adjust);*/
}
nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes);
+ /*printf("%d\n", nbCompressedBytes*50*8);*/
/* This moves the raw bits to take into account the new compressed size */
ec_enc_shrink(enc, nbCompressedBytes);
}
- if (C==2)
- {
- int effectiveRate;
-
- /* Always use MS for 2.5 ms frames until we can do a better analysis */
- if (LM!=0)
- dual_stereo = stereo_analysis(st->mode, X, LM, N);
-
- /* Account for coarse energy */
- effectiveRate = (8*effectiveBytes - 80)>>LM;
-
- /* effectiveRate in kb/s */
- effectiveRate = 2*effectiveRate/5;
- if (effectiveRate<35)
- intensity = 8;
- else if (effectiveRate<50)
- intensity = 12;
- else if (effectiveRate<68)
- intensity = 16;
- else if (effectiveRate<84)
- intensity = 18;
- else if (effectiveRate<102)
- intensity = 19;
- else if (effectiveRate<130)
- intensity = 20;
- else
- intensity = 100;
- intensity = IMIN(st->end,IMAX(st->start, intensity));
- }
/* Bit allocation */
ALLOC(fine_quant, st->mode->nbEBands, int);
@@ -1510,7 +1814,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0;
bits -= anti_collapse_rsv;
codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap,
- alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses,
+ alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses,
fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands);
st->lastCodedBands = codedBands;
@@ -1530,7 +1834,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm,
/* Residual quantisation */
ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char);
quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks,
- bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res,
+ bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res,
nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng);
if (anti_collapse_rsv > 0)
@@ -1882,6 +2186,13 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...)
st->signalling = value;
}
break;
+ case CELT_SET_ANALYSIS_REQUEST:
+ {
+ AnalysisInfo *info = va_arg(ap, AnalysisInfo *);
+ if (info)
+ OPUS_COPY(&st->analysis, info, 1);
+ }
+ break;
case CELT_GET_MODE_REQUEST:
{
const CELTMode ** value = va_arg(ap, const CELTMode**);
@@ -2869,6 +3180,7 @@ const char *opus_strerror(int error)
const char *opus_get_version_string(void)
{
return "libopus " OPUS_VERSION
+ "-exp_analysis"
#ifdef FIXED_POINT
"-fixed"
#endif
diff --git a/celt/celt.h b/celt/celt.h
index 218cd883..4c04ddba 100644
--- a/celt/celt.h
+++ b/celt/celt.h
@@ -50,7 +50,20 @@ extern "C" {
#define CELTDecoder OpusCustomDecoder
#define CELTMode OpusCustomMode
-#define _celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+typedef struct {
+ int valid;
+ opus_val16 tonality;
+ opus_val16 tonality_slope;
+ opus_val16 noisiness;
+ opus_val16 activity;
+ int boost_band[2];
+ opus_val16 boost_amount[2];
+ opus_val16 music_prob;
+}AnalysisInfo;
+
+#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr)))
+
+#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr)))
/* Encoder/decoder Requests */
@@ -81,11 +94,18 @@ extern "C" {
#define CELT_GET_MODE_REQUEST 10015
/** Get the CELTMode used by an encoder or decoder */
-#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, _celt_check_mode_ptr_ptr(x)
+#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x)
#define CELT_SET_SIGNALLING_REQUEST 10016
#define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_REQUEST 10018
+#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x)
+#define CELT_SET_TONALITY_SLOPE_REQUEST 10020
+#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x)
+
+#define CELT_SET_ANALYSIS_REQUEST 10022
+#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x)
/* Encoder stuff */
diff --git a/celt/mathops.h b/celt/mathops.h
index 4e977956..3c7486ad 100644
--- a/celt/mathops.h
+++ b/celt/mathops.h
@@ -43,6 +43,33 @@
unsigned isqrt32(opus_uint32 _val);
+#ifndef OVERRIDE_CELT_MAXABS16
+static inline opus_val16 celt_maxabs16(const opus_val16 *x, int len)
+{
+ int i;
+ opus_val16 maxval = 0;
+ for (i=0;i<len;i++)
+ maxval = MAX16(maxval, ABS16(x[i]));
+ return maxval;
+}
+#endif
+
+#ifndef OVERRIDE_CELT_MAXABS32
+#ifdef FIXED_POINT
+static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len)
+{
+ int i;
+ opus_val32 maxval = 0;
+ for (i=0;i<len;i++)
+ maxval = MAX32(maxval, ABS32(x[i]));
+ return maxval;
+}
+#else
+#define celt_maxabs32(x,len) celt_maxabs16(x,len)
+#endif
+#endif
+
+
#ifndef FIXED_POINT
#define PI 3.141592653f
@@ -117,27 +144,6 @@ static inline opus_int16 celt_ilog2(opus_int32 x)
}
#endif
-#ifndef OVERRIDE_CELT_MAXABS16
-static inline opus_val16 celt_maxabs16(opus_val16 *x, int len)
-{
- int i;
- opus_val16 maxval = 0;
- for (i=0;i<len;i++)
- maxval = MAX16(maxval, ABS16(x[i]));
- return maxval;
-}
-#endif
-
-#ifndef OVERRIDE_CELT_MAXABS32
-static inline opus_val32 celt_maxabs32(opus_val32 *x, int len)
-{
- int i;
- opus_val32 maxval = 0;
- for (i=0;i<len;i++)
- maxval = MAX32(maxval, ABS32(x[i]));
- return maxval;
-}
-#endif
/** Integer log in base2. Defined for zero, but not for negative numbers */
static inline opus_int16 celt_zlog2(opus_val32 x)
diff --git a/celt/mdct.c b/celt/mdct.c
index 16a36c69..b300b2b5 100644
--- a/celt/mdct.c
+++ b/celt/mdct.c
@@ -109,12 +109,14 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
int N, N2, N4;
kiss_twiddle_scalar sine;
VARDECL(kiss_fft_scalar, f);
+ VARDECL(kiss_fft_scalar, f2);
SAVE_STACK;
N = l->n;
N >>= shift;
N2 = N>>1;
N4 = N>>2;
ALLOC(f, N2, kiss_fft_scalar);
+ ALLOC(f2, N2, kiss_fft_scalar);
/* sin(x) ~= x here */
#ifdef FIXED_POINT
sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N;
@@ -180,12 +182,12 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar
}
/* N/4 complex FFT, down-scales by 4/N */
- opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in);
+ opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2);
/* Post-rotate */
{
/* Temp pointers to make it really clear to the compiler what we're doing */
- const kiss_fft_scalar * OPUS_RESTRICT fp = in;
+ const kiss_fft_scalar * OPUS_RESTRICT fp = f2;
kiss_fft_scalar * OPUS_RESTRICT yp1 = out;
kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1);
const kiss_twiddle_scalar *t = &l->trig[0];
diff --git a/celt/pitch.c b/celt/pitch.c
index c2f08ec1..d9bba1b2 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -331,6 +331,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int T1, T1b;
opus_val16 g1;
opus_val16 cont=0;
+ opus_val16 thresh;
T1 = (2*T0+k)/(2*k);
if (T1 < minperiod)
break;
@@ -372,7 +373,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
cont = HALF32(prev_gain);
else
cont = 0;
- if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont)
+ thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont);
+ /* Bias against very high pitch (very short period) to avoid false-positives
+ due to short-term correlation */
+ if (T1<3*minperiod)
+ thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont);
+ else if (T1<2*minperiod)
+ thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont);
+ if (g1 > thresh)
{
best_xy = xy;
best_yy = yy;
diff --git a/celt/quant_bands.c b/celt/quant_bands.c
index b1d4eb15..241392f1 100644
--- a/celt/quant_bands.c
+++ b/celt/quant_bands.c
@@ -40,8 +40,8 @@
#include "rate.h"
#ifdef FIXED_POINT
-/* Mean energy in each band quantized in Q6 */
-static const signed char eMeans[25] = {
+/* Mean energy in each band quantized in Q4 */
+const signed char eMeans[25] = {
103,100, 92, 85, 81,
77, 72, 70, 78, 75,
73, 71, 78, 74, 69,
@@ -49,8 +49,8 @@ static const signed char eMeans[25] = {
60, 60, 60, 60, 60
};
#else
-/* Mean energy in each band quantized in Q6 and converted back to float */
-static const opus_val16 eMeans[25] = {
+/* Mean energy in each band quantized in Q4 and converted back to float */
+const opus_val16 eMeans[25] = {
6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f,
4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f,
4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f,
diff --git a/celt/quant_bands.h b/celt/quant_bands.h
index bec2855c..b3187fad 100644
--- a/celt/quant_bands.h
+++ b/celt/quant_bands.h
@@ -35,6 +35,12 @@
#include "entdec.h"
#include "mathops.h"
+#ifdef FIXED_POINT
+extern const signed char eMeans[25];
+#else
+extern const opus_val16 eMeans[25];
+#endif
+
void amp2Log2(const CELTMode *m, int effEnd, int end,
celt_ener *bandE, opus_val16 *bandLogE, int C);
diff --git a/opus_headers.mk b/opus_headers.mk
index f160710c..43a978cd 100644
--- a/opus_headers.mk
+++ b/opus_headers.mk
@@ -1,4 +1,7 @@
OPUS_HEAD = \
include/opus.h \
include/opus_multistream.h \
-src/opus_private.h
+src/opus_private.h \
+src/analysis.h \
+src/mlp.h \
+src/tansig_table.h
diff --git a/opus_sources.mk b/opus_sources.mk
index 384b036a..81eaef06 100644
--- a/opus_sources.mk
+++ b/opus_sources.mk
@@ -3,3 +3,8 @@ src/opus_decoder.c \
src/opus_encoder.c \
src/opus_multistream.c \
src/repacketizer.c
+
+OPUS_SOURCES_FLOAT = \
+src/analysis.c \
+src/mlp.c \
+src/mlp_data.c
diff --git a/src/analysis.c b/src/analysis.c
new file mode 100644
index 00000000..fde65493
--- /dev/null
+++ b/src/analysis.c
@@ -0,0 +1,460 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include "kiss_fft.h"
+#include "celt.h"
+#include "modes.h"
+#include "arch.h"
+#include "quant_bands.h"
+#include <stdio.h>
+#include "analysis.h"
+#include "mlp.h"
+
+extern const MLP net;
+
+#ifndef M_PI
+#define M_PI 3.141592653
+#endif
+
+static const float dct_table[128] = {
+ 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
+ 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000,
+ 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654,
+ -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851,
+ 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760,
+ -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760,
+ 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631,
+ 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330,
+ 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
+ 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641,
+ 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664,
+ -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806,
+ 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969,
+ -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969,
+ 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292,
+ 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300,
+};
+
+static const float analysis_window[240] = {
+ 0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f,
+ 0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f,
+ 0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f,
+ 0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f,
+ 0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f,
+ 0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f,
+ 0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f,
+ 0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f,
+ 0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f,
+ 0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f,
+ 0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f,
+ 0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f,
+ 0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f,
+ 0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f,
+ 0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f,
+ 0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f,
+ 0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f,
+ 0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f,
+ 0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f,
+ 0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f,
+ 0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f,
+ 0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f,
+ 0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f,
+ 0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f,
+ 0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f,
+ 0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f,
+ 0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f,
+ 0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f,
+ 0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f,
+ 0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f,
+};
+
+static const int tbands[NB_TBANDS+1] = {
+ 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120
+};
+
+/*static const float tweight[NB_TBANDS+1] = {
+ .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5
+};*/
+
+#define NB_TONAL_SKIP_BANDS 9
+
+#define cA 0.43157974f
+#define cB 0.67848403f
+#define cC 0.08595542f
+#define cE (M_PI/2)
+static inline float fast_atan2f(float y, float x) {
+ float x2, y2;
+ /* Should avoid underflow on the values we'll get */
+ if (ABS16(x)+ABS16(y)<1e-9)
+ {
+ x*=1e12;
+ y*=1e12;
+ }
+ x2 = x*x;
+ y2 = y*y;
+ if(x2<y2){
+ float den = (y2 + cB*x2) * (y2 + cC*x2);
+ if (den!=0)
+ return -x*y*(y2 + cA*x2) / den + copysignf(cE,y);
+ else
+ return copysignf(cE,y);
+ }else{
+ float den = (x2 + cB*y2) * (x2 + cC*y2);
+ if (den!=0)
+ return x*y*(x2 + cA*y2) / den + copysignf(cE,y) - copysignf(cE,x*y);
+ else
+ return copysignf(cE,y) - copysignf(cE,x*y);
+ }
+}
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C)
+{
+ int i, b;
+ const CELTMode *mode;
+ const kiss_fft_state *kfft;
+ kiss_fft_cpx in[480], out[480];
+ int N = 480, N2=240;
+ float * restrict A = tonal->angle;
+ float * restrict dA = tonal->d_angle;
+ float * restrict d2A = tonal->d2_angle;
+ float tonality[240];
+ float noisiness[240];
+ float band_tonality[NB_TBANDS];
+ float logE[NB_TBANDS];
+ float BFCC[8];
+ float features[100];
+ float frame_tonality;
+ float max_frame_tonality;
+ float tw_sum=0;
+ float frame_noisiness;
+ const float pi4 = M_PI*M_PI*M_PI*M_PI;
+ float slope=0;
+ float frame_stationarity;
+ float relativeE;
+ float frame_prob;
+ float alpha, alphaE, alphaE2;
+ float frame_loudness;
+ float bandwidth_mask;
+ int bandwidth=0;
+ float bandE[NB_TBANDS];
+ celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode));
+
+ tonal->last_transition++;
+ alpha = 1.f/IMIN(20, 1+tonal->count);
+ alphaE = 1.f/IMIN(50, 1+tonal->count);
+ alphaE2 = 1.f/IMIN(6000, 1+tonal->count);
+
+ if (tonal->count<4)
+ tonal->music_prob = .5;
+ kfft = mode->mdct.kfft[0];
+ if (C==1)
+ {
+ for (i=0;i<N2;i++)
+ {
+ float w = analysis_window[i];
+ in[i].r = MULT16_16(w, x[i]);
+ in[i].i = MULT16_16(w, x[N-N2+i]);
+ in[N-i-1].r = MULT16_16(w, x[N-i-1]);
+ in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]);
+ }
+ } else {
+ for (i=0;i<N2;i++)
+ {
+ float w = analysis_window[i];
+ in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]);
+ in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]);
+ in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]);
+ in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]);
+ }
+ }
+ opus_fft(kfft, in, out);
+
+ for (i=1;i<N2;i++)
+ {
+ float X1r, X2r, X1i, X2i;
+ float angle, d_angle, d2_angle;
+ float angle2, d_angle2, d2_angle2;
+ float mod1, mod2, avg_mod;
+ X1r = out[i].r+out[N-i].r;
+ X1i = out[i].i-out[N-i].i;
+ X2r = out[i].i+out[N-i].i;
+ X2i = out[N-i].r-out[i].r;
+
+ angle = (.5/M_PI)*fast_atan2f(X1i, X1r);
+ d_angle = angle - A[i];
+ d2_angle = d_angle - dA[i];
+
+ angle2 = (.5/M_PI)*fast_atan2f(X2i, X2r);
+ d_angle2 = angle2 - angle;
+ d2_angle2 = d_angle2 - d_angle;
+
+ mod1 = d2_angle - floor(.5+d2_angle);
+ noisiness[i] = fabs(mod1);
+ mod1 *= mod1;
+ mod1 *= mod1;
+
+ mod2 = d2_angle2 - floor(.5+d2_angle2);
+ noisiness[i] += fabs(mod2);
+ mod2 *= mod2;
+ mod2 *= mod2;
+
+ avg_mod = .25*(d2A[i]+2*mod1+mod2);
+ tonality[i] = 1./(1+40*16*pi4*avg_mod)-.015;
+
+ A[i] = angle2;
+ dA[i] = d_angle2;
+ d2A[i] = mod2;
+ }
+
+ frame_tonality = 0;
+ max_frame_tonality = 0;
+ tw_sum = 0;
+ info->activity = 0;
+ frame_noisiness = 0;
+ frame_stationarity = 0;
+ if (!tonal->count)
+ {
+ for (b=0;b<NB_TBANDS;b++)
+ {
+ tonal->lowE[b] = 1e10;
+ tonal->highE[b] = -1e10;
+ }
+ }
+ relativeE = 0;
+ info->boost_amount[0]=info->boost_amount[1]=0;
+ info->boost_band[0]=info->boost_band[1]=0;
+ frame_loudness = 0;
+ bandwidth_mask = 0;
+ for (b=0;b<NB_TBANDS;b++)
+ {
+ float E=0, tE=0, nE=0;
+ float L1, L2;
+ float stationarity;
+ for (i=tbands[b];i<tbands[b+1];i++)
+ {
+ float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r
+ + out[i].i*out[i].i + out[N-i].i*out[N-i].i;
+ E += binE;
+ tE += binE*tonality[i];
+ nE += binE*2*(.5-noisiness[i]);
+ }
+ bandE[b] = E;
+ tonal->E[tonal->E_count][b] = E;
+ frame_noisiness += nE/(1e-15+E);
+
+ frame_loudness += sqrt(E+1e-10);
+ /* Add a reasonable noise floor */
+ tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E;
+ tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E);
+ /* 13 dB slope for spreading function */
+ bandwidth_mask = MAX32(.05*bandwidth_mask, E);
+ /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */
+ if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask)
+ bandwidth = b;
+ logE[b] = log(E+1e-10);
+ tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01);
+ tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1);
+ if (tonal->highE[b] < tonal->lowE[b]+1)
+ {
+ tonal->highE[b]+=.5;
+ tonal->lowE[b]-=.5;
+ }
+ relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]);
+
+ L1=L2=0;
+ for (i=0;i<NB_FRAMES;i++)
+ {
+ L1 += sqrt(tonal->E[i][b]);
+ L2 += tonal->E[i][b];
+ }
+
+ stationarity = MIN16(0.99,L1/sqrt(EPSILON+NB_FRAMES*L2));
+ stationarity *= stationarity;
+ stationarity *= stationarity;
+ frame_stationarity += stationarity;
+ /*band_tonality[b] = tE/(1e-15+E)*/;
+ band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]);
+#if 0
+ if (b>=NB_TONAL_SKIP_BANDS)
+ {
+ frame_tonality += tweight[b]*band_tonality[b];
+ tw_sum += tweight[b];
+ }
+#else
+ frame_tonality += band_tonality[b];
+ if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS)
+ frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS];
+#endif
+ max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality);
+ slope += band_tonality[b]*(b-8);
+ /*printf("%f %f ", band_tonality[b], stationarity);*/
+ if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1)
+ {
+ if (band_tonality[b] > info->boost_amount[0])
+ {
+ info->boost_amount[1] = info->boost_amount[0];
+ info->boost_band[1] = info->boost_band[0];
+ info->boost_amount[0] = band_tonality[b];
+ info->boost_band[0] = b;
+ } else {
+ info->boost_amount[1] = band_tonality[b];
+ info->boost_band[1] = b;
+ }
+ }
+ tonal->prev_band_tonality[b] = band_tonality[b];
+ }
+
+ frame_loudness = 20*log10(frame_loudness);
+ tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness);
+ tonal->lowECount *= (1-alphaE);
+ if (frame_loudness < tonal->Etracker-30)
+ tonal->lowECount += alphaE;
+
+ for (i=0;i<8;i++)
+ {
+ float sum=0;
+ for (b=0;b<16;b++)
+ sum += dct_table[i*16+b]*logE[b];
+ BFCC[i] = sum;
+ }
+
+ frame_stationarity /= NB_TBANDS;
+ relativeE /= NB_TBANDS;
+ if (tonal->count<10)
+ relativeE = .5;
+ frame_noisiness /= NB_TBANDS;
+#if 1
+ info->activity = frame_noisiness + (1-frame_noisiness)*relativeE;
+#else
+ info->activity = .5*(1+frame_noisiness-frame_stationarity);
+#endif
+ frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS));
+ frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8);
+ tonal->prev_tonality = frame_tonality;
+ info->boost_amount[0] -= frame_tonality+.2;
+ info->boost_amount[1] -= frame_tonality+.2;
+ if (band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]+1]+.15
+ || band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]-1]+.15)
+ info->boost_amount[0]=0;
+ if (band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]+1]+.15
+ || band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]-1]+.15)
+ info->boost_amount[1]=0;
+
+ slope /= 8*8;
+ info->tonality_slope = slope;
+
+ tonal->E_count = (tonal->E_count+1)%NB_FRAMES;
+ tonal->count++;
+ info->tonality = frame_tonality;
+
+ for (i=0;i<4;i++)
+ features[i] = -0.12299*(BFCC[i]+tonal->mem[i+24]) + 0.49195*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693*tonal->mem[i+8] - 1.4349*tonal->cmean[i];
+
+ for (i=0;i<4;i++)
+ tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i];
+
+ for (i=0;i<4;i++)
+ features[4+i] = 0.63246*(BFCC[i]-tonal->mem[i+24]) + 0.31623*(tonal->mem[i]-tonal->mem[i+16]);
+ for (i=0;i<3;i++)
+ features[8+i] = 0.53452*(BFCC[i]+tonal->mem[i+24]) - 0.26726*(tonal->mem[i]+tonal->mem[i+16]) -0.53452*tonal->mem[i+8];
+
+ if (tonal->count > 5)
+ {
+ for (i=0;i<9;i++)
+ tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i];
+ }
+
+ for (i=0;i<8;i++)
+ {
+ tonal->mem[i+24] = tonal->mem[i+16];
+ tonal->mem[i+16] = tonal->mem[i+8];
+ tonal->mem[i+8] = tonal->mem[i];
+ tonal->mem[i] = BFCC[i];
+ }
+ for (i=0;i<9;i++)
+ features[11+i] = sqrt(tonal->std[i]);
+ features[20] = info->tonality;
+ features[21] = info->activity;
+ features[22] = frame_stationarity;
+ features[23] = info->tonality_slope;
+ features[24] = tonal->lowECount;
+
+#ifndef FIXED_POINT
+ mlp_process(&net, features, &frame_prob);
+ frame_prob = .5*(frame_prob+1);
+ /* Curve fitting between the MLP probability and the actual probability */
+ frame_prob = .01 + 1.21*frame_prob*frame_prob - .23*pow(frame_prob, 10);
+
+ /*printf("%f\n", frame_prob);*/
+ {
+ float tau, beta;
+ float p0, p1;
+ float max_certainty;
+ /* One transition every 3 minutes */
+ tau = .00005;
+ beta = .1;
+ max_certainty = .01+1.f/(20+.5*tonal->last_transition);
+ p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau;
+ p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau;
+ p0 *= pow(1-frame_prob, beta);
+ p1 *= pow(frame_prob, beta);
+ tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1)));
+ info->music_prob = tonal->music_prob;
+ /*printf("%f %f\n", frame_prob, info->music_prob);*/
+ }
+ if (tonal->last_music != (tonal->music_prob>.5))
+ tonal->last_transition=0;
+ tonal->last_music = tonal->music_prob>.5;
+#else
+ info->music_prob = 0;
+#endif
+ /*for (i=0;i<25;i++)
+ printf("%f ", features[i]);
+ printf("\n");*/
+
+ /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */
+ if (bandwidth == NB_TBANDS-1 || tonal->count<100)
+ {
+ tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND;
+ } else {
+ int close_enough = 0;
+ if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1)
+ close_enough=1;
+ if (bandwidth<=11 || (bandwidth==12 && close_enough))
+ tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND;
+ else if (bandwidth<=13)
+ tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND;
+ else if (bandwidth<=15 || (bandwidth==16 && close_enough))
+ tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND;
+ }
+ info->noisiness = frame_noisiness;
+ info->valid = 1;
+}
diff --git a/src/analysis.h b/src/analysis.h
new file mode 100644
index 00000000..09d1036a
--- /dev/null
+++ b/src/analysis.h
@@ -0,0 +1,60 @@
+/* Copyright (c) 2011 Xiph.Org Foundation
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef ANALYSIS_H
+#define ANALYSIS_H
+
+#define NB_FRAMES 8
+#define NB_TBANDS 18
+
+
+typedef struct {
+ float angle[240];
+ float d_angle[240];
+ float d2_angle[240];
+ float prev_band_tonality[NB_TBANDS];
+ float prev_tonality;
+ float E[NB_FRAMES][NB_TBANDS];
+ float lowE[NB_TBANDS], highE[NB_TBANDS];
+ float meanE[NB_TBANDS], meanRE[NB_TBANDS];
+ float mem[32];
+ float cmean[8];
+ float std[9];
+ float music_prob;
+ float Etracker;
+ float lowECount;
+ int E_count;
+ int last_music;
+ int last_transition;
+ int count;
+ int opus_bandwidth;
+} TonalityAnalysisState;
+
+void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info,
+ CELTEncoder *celt_enc, const opus_val16 *x, int C);
+
+#endif
diff --git a/src/mlp.c b/src/mlp.c
new file mode 100644
index 00000000..dd3690db
--- /dev/null
+++ b/src/mlp.c
@@ -0,0 +1,109 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include <math.h>
+#include "mlp.h"
+#include "arch.h"
+#include "tansig_table.h"
+#define MAX_NEURONS 100
+
+#ifdef FIXED_POINT
+extern const opus_val16 tansig_table[501];
+static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */
+{
+ int i;
+ opus_val16 xx; /* Q11 */
+ /*double x, y;*/
+ opus_val16 dy, yy; /* Q14 */
+ /*x = 1.9073e-06*_x;*/
+ if (_x>=QCONST32(10,19))
+ return QCONST32(1.,14);
+ if (_x<=-QCONST32(10,19))
+ return -QCONST32(1.,14);
+ xx = EXTRACT16(SHR32(_x, 8));
+ /*i = lrint(25*x);*/
+ i = SHR32(ADD32(1024,MULT16_16(25, xx)),11);
+ /*x -= .04*i;*/
+ xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8));
+ /*x = xx*(1./2048);*/
+ /*y = tansig_table[250+i];*/
+ yy = tansig_table[250+i];
+ /*y = yy*(1./16384);*/
+ dy = 16384-MULT16_16_Q14(yy,yy);
+ yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx)));
+ return yy;
+}
+#else
+/*extern const float tansig_table[501];*/
+static inline double tansig_approx(double x)
+{
+ int i;
+ double y, dy;
+ double sign=1;
+ if (x>=8)
+ return 1;
+ if (x<=-8)
+ return -1;
+ if (x<0)
+ {
+ x=-x;
+ sign=-1;
+ }
+ i = lrint(25*x);
+ x -= .04*i;
+ y = tansig_table[i];
+ dy = 1-y*y;
+ y = y + x*dy*(1 - y*x);
+ return sign*y;
+}
+#endif
+
+void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out)
+{
+ int j;
+ opus_val16 hidden[MAX_NEURONS];
+ const opus_val16 *W = m->weights;
+ /* Copy to tmp_in */
+ for (j=0;j<m->topo[1];j++)
+ {
+ int k;
+ opus_val32 sum = SHL32(EXTEND32(*W++),8);
+ for (k=0;k<m->topo[0];k++)
+ sum = MAC16_16(sum, in[k],*W++);
+ hidden[j] = tansig_approx(sum);
+ }
+ for (j=0;j<m->topo[2];j++)
+ {
+ int k;
+ opus_val32 sum = SHL32(EXTEND32(*W++),14);
+ for (k=0;k<m->topo[1];k++)
+ sum = MAC16_16(sum, hidden[k], *W++);
+ out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17)));
+ }
+}
+
diff --git a/src/mlp.h b/src/mlp.h
new file mode 100644
index 00000000..68ff68d8
--- /dev/null
+++ b/src/mlp.h
@@ -0,0 +1,41 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _MLP_H_
+#define _MLP_H_
+
+#include "arch.h"
+
+typedef struct {
+ int layers;
+ const int *topo;
+ const opus_val16 *weights;
+} MLP;
+
+void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out);
+
+#endif /* _MLP_H_ */
diff --git a/src/mlp_data.c b/src/mlp_data.c
new file mode 100644
index 00000000..dbc7cec3
--- /dev/null
+++ b/src/mlp_data.c
@@ -0,0 +1,73 @@
+#include "mlp.h"
+
+/* RMS error was 0.179835, seed was 1322103961 */
+
+static const float weights[271] = {
+
+/* hidden layer */
+1.55597, -0.0739792, -0.0646761, -0.099531, -0.0794943,
+0.0180174, -0.0391354, 0.0508224, -0.0160169, -0.0773263,
+-0.0300002, -0.0865361, 0.124477, -0.28648, -0.0860702,
+-0.518949, -0.0873341, -0.235393, -0.907833, -0.383573,
+0.535388, -0.57944, 0.98116, 0.8482, 1.12426,
+-3.23721, -0.647072, -0.0265139, 0.0711052, -0.00125666,
+-0.0396181, -0.44282, -0.510495, -0.201865, 0.0134336,
+-0.167205, -0.155406, 0.00041678, -0.00468705, -0.0233224,
+0.264279, -0.301375, 0.00234895, 0.0144741, -0.137535,
+0.200323, 0.0192027, 3.19818, 2.03495, 0.705517,
+-4.6025, -0.11485, -0.792716, 0.150714, 0.10608,
+0.240633, 0.0690698, 0.0695297, 0.124819, 0.0501433,
+0.0460952, 0.147639, 0.10327, 0.158007, 0.113714,
+0.0276191, 0.0680749, -0.130012, 0.0796126, 0.133067,
+0.51495, 0.747578, -0.128742, 5.98112, -1.16698,
+-0.276492, -1.73549, -3.90234, 2.01489, -0.040118,
+-0.113002, -0.146751, -0.113569, 0.0534873, 0.0989832,
+0.0872875, 0.049266, 0.0367557, -0.00889148, -0.0648461,
+-0.00190352, 0.0143773, 0.0259364, -0.0592133, -0.0672924,
+0.1399, -0.0987886, -0.347402, 0.101326, -0.0680876,
+0.469186, 0.246922, 10.4017, 3.44846, -0.662725,
+-0.0328208, -0.0561274, -0.0167744, 0.00044282, -0.0457645,
+-0.0408314, -0.013113, -0.0373873, -0.0474122, -0.0273745,
+-0.0308505, 0.000582959, -0.0421135, 0.464859, 0.196842,
+0.320538, 0.0435528, -0.200168, 0.266475, -0.0853727,
+1.20397, 0.711542, -1.04397, -1.47759, 1.26768,
+0.446958, 0.266477, -0.30802, 0.28431, -0.118541,
+0.00836345, 0.0689026, -0.0137996, -0.0395417, 0.26982,
+-0.206255, 0.16066, 0.114757, 0.359587, -0.106503,
+-0.0948534, 0.175358, -0.122966, -0.0056675, 0.483848,
+-0.134916, -0.427567, -0.140172, -1.0866, -2.73921,
+0.549843, 0.17685, 0.0010675, -0.00137386, 0.0884424,
+-0.0698736, -0.00174136, 0.0718775, -0.0396849, 0.0448056,
+0.0577853, -0.0372353, 0.134599, 0.0260656, 0.140322,
+0.22704, -0.020568, -0.0142424, -0.21723, -0.997704,
+-0.884573, -0.163495, 2.33617, 0.224142, 0.19635,
+-0.957387, 0.144678, 1.47035, -0.00700498, -0.0472309,
+-0.0137848, -0.0189145, 0.00856479, 0.0316965, 0.00613373,
+0.00209807, 0.00270964, -0.0490206, 0.0105712, -0.0465045,
+-0.0381532, -0.0985268, -0.108297, 0.0146409, -0.0040718,
+-0.0698572, -0.380568, -0.230479, 3.98917, 0.457652,
+-1.02355, -7.4435, -0.475314, 1.61743, 0.0254017,
+-0.00791293, 0.047217, 0.0220995, -0.0304311, 0.0052168,
+-0.0404054, -0.0230293, 0.00169229, -0.0138178, 0.0043137,
+-0.0598088, -0.133601, 0.0555138, -0.177358, -0.159856,
+-0.137281, 0.108051, -0.305973, 0.393775, 0.0747287,
+0.783993, -0.875086, 1.06862, 0.340519, -0.352681,
+-0.0830912, -0.100017, 0.0729085, -0.00829403, 0.027489,
+-0.0779597, 0.082286, -0.164181, -0.41519, 0.00282335,
+-0.29573, 0.125571, 0.726935, 0.392137, 0.491348,
+0.0723196, -0.0259758, -0.0636332, -0.452384, -0.000225974,
+-2.34001, 2.45211, -0.544628, 5.62944, -3.44507,
+
+/* output layer */
+-3.13835, 0.994751, 0.444901, 1.59518, 1.23665,
+3.37012, -1.34606, 1.99131, 1.33476, 1.3885,
+1.12559, };
+
+static const int topo[3] = {25, 10, 1};
+
+const MLP net = {
+ 3,
+ topo,
+ weights
+};
+
diff --git a/src/mlp_train.c b/src/mlp_train.c
new file mode 100644
index 00000000..6421c17d
--- /dev/null
+++ b/src/mlp_train.c
@@ -0,0 +1,496 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+
+#include "mlp_train.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <semaphore.h>
+#include <pthread.h>
+#include <time.h>
+#include <signal.h>
+
+int stopped = 0;
+
+void handler(int sig)
+{
+ stopped = 1;
+ signal(sig, handler);
+}
+
+MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int nbSamples)
+{
+ int i, j, k;
+ MLPTrain *net;
+ int inDim, outDim;
+ net = malloc(sizeof(*net));
+ net->topo = malloc(nbLayers*sizeof(net->topo[0]));
+ for (i=0;i<nbLayers;i++)
+ net->topo[i] = topo[i];
+ inDim = topo[0];
+ outDim = topo[nbLayers-1];
+ net->in_rate = malloc((inDim+1)*sizeof(net->in_rate[0]));
+ net->weights = malloc((nbLayers-1)*sizeof(net->weights));
+ net->best_weights = malloc((nbLayers-1)*sizeof(net->weights));
+ for (i=0;i<nbLayers-1;i++)
+ {
+ net->weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0]));
+ net->best_weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0]));
+ }
+ double inMean[inDim];
+ for (j=0;j<inDim;j++)
+ {
+ double std=0;
+ inMean[j] = 0;
+ for (i=0;i<nbSamples;i++)
+ {
+ inMean[j] += inputs[i*inDim+j];
+ std += inputs[i*inDim+j]*inputs[i*inDim+j];
+ }
+ inMean[j] /= nbSamples;
+ std /= nbSamples;
+ net->in_rate[1+j] = .5/(.0001+std);
+ std = std-inMean[j]*inMean[j];
+ if (std<.001)
+ std = .001;
+ std = 1/sqrt(inDim*std);
+ for (k=0;k<topo[1];k++)
+ net->weights[0][k*(topo[0]+1)+j+1] = randn(std);
+ }
+ net->in_rate[0] = 1;
+ for (j=0;j<topo[1];j++)
+ {
+ double sum = 0;
+ for (k=0;k<inDim;k++)
+ sum += inMean[k]*net->weights[0][j*(topo[0]+1)+k+1];
+ net->weights[0][j*(topo[0]+1)] = -sum;
+ }
+ for (j=0;j<outDim;j++)
+ {
+ double mean = 0;
+ double std;
+ for (i=0;i<nbSamples;i++)
+ mean += outputs[i*outDim+j];
+ mean /= nbSamples;
+ std = 1/sqrt(topo[nbLayers-2]);
+ net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)] = mean;
+ for (k=0;k<topo[nbLayers-2];k++)
+ net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)+k+1] = randn(std);
+ }
+ return net;
+}
+
+#define MAX_NEURONS 100
+
+double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamples, double *W0_grad, double *W1_grad, double *error_rate)
+{
+ int i,j;
+ int s;
+ int inDim, outDim, hiddenDim;
+ int *topo;
+ double *W0, *W1;
+ double rms=0;
+ int W0_size, W1_size;
+ double hidden[MAX_NEURONS];
+ double netOut[MAX_NEURONS];
+ double error[MAX_NEURONS];
+
+ *error_rate = 0;
+ topo = net->topo;
+ inDim = net->topo[0];
+ hiddenDim = net->topo[1];
+ outDim = net->topo[2];
+ W0_size = (topo[0]+1)*topo[1];
+ W1_size = (topo[1]+1)*topo[2];
+ W0 = net->weights[0];
+ W1 = net->weights[1];
+ memset(W0_grad, 0, W0_size*sizeof(double));
+ memset(W1_grad, 0, W1_size*sizeof(double));
+ for (i=0;i<outDim;i++)
+ netOut[i] = outputs[i];
+ for (s=0;s<nbSamples;s++)
+ {
+ float *in, *out;
+ in = inputs+s*inDim;
+ out = outputs + s*outDim;
+ for (i=0;i<hiddenDim;i++)
+ {
+ double sum = W0[i*(inDim+1)];
+ for (j=0;j<inDim;j++)
+ sum += W0[i*(inDim+1)+j+1]*in[j];
+ hidden[i] = tansig_approx(sum);
+ }
+ for (i=0;i<outDim;i++)
+ {
+ double sum = W1[i*(hiddenDim+1)];
+ for (j=0;j<hiddenDim;j++)
+ sum += W1[i*(hiddenDim+1)+j+1]*hidden[j];
+ netOut[i] = tansig_approx(sum);
+ error[i] = out[i] - netOut[i];
+ rms += error[i]*error[i];
+ *error_rate += fabs(error[i])>1;
+ /*error[i] = error[i]/(1+fabs(error[i]));*/
+ }
+ /* Back-propagate error */
+ for (i=0;i<outDim;i++)
+ {
+ float grad = 1-netOut[i]*netOut[i];
+ W1_grad[i*(hiddenDim+1)] += error[i]*grad;
+ for (j=0;j<hiddenDim;j++)
+ W1_grad[i*(hiddenDim+1)+j+1] += grad*error[i]*hidden[j];
+ }
+ for (i=0;i<hiddenDim;i++)
+ {
+ double grad;
+ grad = 0;
+ for (j=0;j<outDim;j++)
+ grad += error[j]*W1[j*(hiddenDim+1)+i+1];
+ grad *= 1-hidden[i]*hidden[i];
+ W0_grad[i*(inDim+1)] += grad;
+ for (j=0;j<inDim;j++)
+ W0_grad[i*(inDim+1)+j+1] += grad*in[j];
+ }
+ }
+ return rms;
+}
+
+#define NB_THREADS 8
+
+sem_t sem_begin[NB_THREADS];
+sem_t sem_end[NB_THREADS];
+
+struct GradientArg {
+ int id;
+ int done;
+ MLPTrain *net;
+ float *inputs;
+ float *outputs;
+ int nbSamples;
+ double *W0_grad;
+ double *W1_grad;
+ double rms;
+ double error_rate;
+};
+
+void *gradient_thread_process(void *_arg)
+{
+ int W0_size, W1_size;
+ struct GradientArg *arg = _arg;
+ int *topo = arg->net->topo;
+ W0_size = (topo[0]+1)*topo[1];
+ W1_size = (topo[1]+1)*topo[2];
+ double W0_grad[W0_size];
+ double W1_grad[W1_size];
+ arg->W0_grad = W0_grad;
+ arg->W1_grad = W1_grad;
+ while (1)
+ {
+ sem_wait(&sem_begin[arg->id]);
+ if (arg->done)
+ break;
+ arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, &arg->error_rate);
+ sem_post(&sem_end[arg->id]);
+ }
+ fprintf(stderr, "done\n");
+ return NULL;
+}
+
+float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSamples, int nbEpoch, float rate)
+{
+ int i, j;
+ int e;
+ float best_rms = 1e10;
+ int inDim, outDim, hiddenDim;
+ int *topo;
+ double *W0, *W1, *best_W0, *best_W1;
+ double *W0_old, *W1_old;
+ double *W0_old2, *W1_old2;
+ double *W0_grad, *W1_grad;
+ double *W0_oldgrad, *W1_oldgrad;
+ double *W0_rate, *W1_rate;
+ double *best_W0_rate, *best_W1_rate;
+ int W0_size, W1_size;
+ topo = net->topo;
+ W0_size = (topo[0]+1)*topo[1];
+ W1_size = (topo[1]+1)*topo[2];
+ struct GradientArg args[NB_THREADS];
+ pthread_t thread[NB_THREADS];
+ int samplePerPart = nbSamples/NB_THREADS;
+ int count_worse=0;
+ int count_retries=0;
+
+ topo = net->topo;
+ inDim = net->topo[0];
+ hiddenDim = net->topo[1];
+ outDim = net->topo[2];
+ W0 = net->weights[0];
+ W1 = net->weights[1];
+ best_W0 = net->best_weights[0];
+ best_W1 = net->best_weights[1];
+ W0_old = malloc(W0_size*sizeof(double));
+ W1_old = malloc(W1_size*sizeof(double));
+ W0_old2 = malloc(W0_size*sizeof(double));
+ W1_old2 = malloc(W1_size*sizeof(double));
+ W0_grad = malloc(W0_size*sizeof(double));
+ W1_grad = malloc(W1_size*sizeof(double));
+ W0_oldgrad = malloc(W0_size*sizeof(double));
+ W1_oldgrad = malloc(W1_size*sizeof(double));
+ W0_rate = malloc(W0_size*sizeof(double));
+ W1_rate = malloc(W1_size*sizeof(double));
+ best_W0_rate = malloc(W0_size*sizeof(double));
+ best_W1_rate = malloc(W1_size*sizeof(double));
+ memcpy(W0_old, W0, W0_size*sizeof(double));
+ memcpy(W0_old2, W0, W0_size*sizeof(double));
+ memset(W0_grad, 0, W0_size*sizeof(double));
+ memset(W0_oldgrad, 0, W0_size*sizeof(double));
+ memcpy(W1_old, W1, W1_size*sizeof(double));
+ memcpy(W1_old2, W1, W1_size*sizeof(double));
+ memset(W1_grad, 0, W1_size*sizeof(double));
+ memset(W1_oldgrad, 0, W1_size*sizeof(double));
+
+ rate /= nbSamples;
+ for (i=0;i<hiddenDim;i++)
+ for (j=0;j<inDim+1;j++)
+ W0_rate[i*(inDim+1)+j] = rate*net->in_rate[j];
+ for (i=0;i<W1_size;i++)
+ W1_rate[i] = rate;
+
+ for (i=0;i<NB_THREADS;i++)
+ {
+ args[i].net = net;
+ args[i].inputs = inputs+i*samplePerPart*inDim;
+ args[i].outputs = outputs+i*samplePerPart*outDim;
+ args[i].nbSamples = samplePerPart;
+ args[i].id = i;
+ args[i].done = 0;
+ sem_init(&sem_begin[i], 0, 0);
+ sem_init(&sem_end[i], 0, 0);
+ pthread_create(&thread[i], NULL, gradient_thread_process, &args[i]);
+ }
+ for (e=0;e<nbEpoch;e++)
+ {
+ double rms=0;
+ double error_rate = 0;
+ for (i=0;i<NB_THREADS;i++)
+ {
+ sem_post(&sem_begin[i]);
+ }
+ memset(W0_grad, 0, W0_size*sizeof(double));
+ memset(W1_grad, 0, W1_size*sizeof(double));
+ for (i=0;i<NB_THREADS;i++)
+ {
+ sem_wait(&sem_end[i]);
+ rms += args[i].rms;
+ error_rate += args[i].error_rate;
+ for (j=0;j<W0_size;j++)
+ W0_grad[j] += args[i].W0_grad[j];
+ for (j=0;j<W1_size;j++)
+ W1_grad[j] += args[i].W1_grad[j];
+ }
+
+ float mean_rate = 0, min_rate = 1e10;
+ rms = (rms/(outDim*nbSamples));
+ error_rate = (error_rate/(outDim*nbSamples));
+ fprintf (stderr, "%f (%f %f) ", error_rate, rms, best_rms);
+ if (rms < best_rms)
+ {
+ best_rms = rms;
+ for (i=0;i<W0_size;i++)
+ {
+ best_W0[i] = W0[i];
+ best_W0_rate[i] = W0_rate[i];
+ }
+ for (i=0;i<W1_size;i++)
+ {
+ best_W1[i] = W1[i];
+ best_W1_rate[i] = W1_rate[i];
+ }
+ count_worse=0;
+ count_retries=0;
+ } else {
+ count_worse++;
+ if (count_worse>30)
+ {
+ count_retries++;
+ count_worse=0;
+ for (i=0;i<W0_size;i++)
+ {
+ W0[i] = best_W0[i];
+ best_W0_rate[i] *= .7;
+ if (best_W0_rate[i]<1e-15) best_W0_rate[i]=1e-15;
+ W0_rate[i] = best_W0_rate[i];
+ W0_grad[i] = 0;
+ }
+ for (i=0;i<W1_size;i++)
+ {
+ W1[i] = best_W1[i];
+ best_W1_rate[i] *= .8;
+ if (best_W1_rate[i]<1e-15) best_W1_rate[i]=1e-15;
+ W1_rate[i] = best_W1_rate[i];
+ W1_grad[i] = 0;
+ }
+ }
+ }
+ if (count_retries>10)
+ break;
+ for (i=0;i<W0_size;i++)
+ {
+ if (W0_oldgrad[i]*W0_grad[i] > 0)
+ W0_rate[i] *= 1.01;
+ else if (W0_oldgrad[i]*W0_grad[i] < 0)
+ W0_rate[i] *= .9;
+ mean_rate += W0_rate[i];
+ if (W0_rate[i] < min_rate)
+ min_rate = W0_rate[i];
+ if (W0_rate[i] < 1e-15)
+ W0_rate[i] = 1e-15;
+ /*if (W0_rate[i] > .01)
+ W0_rate[i] = .01;*/
+ W0_oldgrad[i] = W0_grad[i];
+ W0_old2[i] = W0_old[i];
+ W0_old[i] = W0[i];
+ W0[i] += W0_grad[i]*W0_rate[i];
+ }
+ for (i=0;i<W1_size;i++)
+ {
+ if (W1_oldgrad[i]*W1_grad[i] > 0)
+ W1_rate[i] *= 1.01;
+ else if (W1_oldgrad[i]*W1_grad[i] < 0)
+ W1_rate[i] *= .9;
+ mean_rate += W1_rate[i];
+ if (W1_rate[i] < min_rate)
+ min_rate = W1_rate[i];
+ if (W1_rate[i] < 1e-15)
+ W1_rate[i] = 1e-15;
+ W1_oldgrad[i] = W1_grad[i];
+ W1_old2[i] = W1_old[i];
+ W1_old[i] = W1[i];
+ W1[i] += W1_grad[i]*W1_rate[i];
+ }
+ mean_rate /= (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2];
+ fprintf (stderr, "%g %d", mean_rate, e);
+ if (count_retries)
+ fprintf(stderr, " %d", count_retries);
+ fprintf(stderr, "\n");
+ if (stopped)
+ break;
+ }
+ for (i=0;i<NB_THREADS;i++)
+ {
+ args[i].done = 1;
+ sem_post(&sem_begin[i]);
+ pthread_join(thread[i], NULL);
+ fprintf (stderr, "joined %d\n", i);
+ }
+ free(W0_old);
+ free(W1_old);
+ free(W0_grad);
+ free(W1_grad);
+ free(W0_rate);
+ free(W1_rate);
+ return best_rms;
+}
+
+int main(int argc, char **argv)
+{
+ int i, j;
+ int nbInputs;
+ int nbOutputs;
+ int nbHidden;
+ int nbSamples;
+ int nbEpoch;
+ int nbRealInputs;
+ unsigned int seed;
+ int ret;
+ float rms;
+ float *inputs;
+ float *outputs;
+ if (argc!=6)
+ {
+ fprintf (stderr, "usage: mlp_train <inputs> <hidden> <outputs> <nb samples> <nb epoch>\n");
+ return 1;
+ }
+ nbInputs = atoi(argv[1]);
+ nbHidden = atoi(argv[2]);
+ nbOutputs = atoi(argv[3]);
+ nbSamples = atoi(argv[4]);
+ nbEpoch = atoi(argv[5]);
+ nbRealInputs = nbInputs;
+ inputs = malloc(nbInputs*nbSamples*sizeof(*inputs));
+ outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs));
+
+ seed = time(NULL);
+ fprintf (stderr, "Seed is %u\n", seed);
+ srand(seed);
+ build_tansig_table();
+ signal(SIGTERM, handler);
+ signal(SIGINT, handler);
+ signal(SIGHUP, handler);
+ for (i=0;i<nbSamples;i++)
+ {
+ for (j=0;j<nbRealInputs;j++)
+ ret = scanf(" %f", &inputs[i*nbInputs+j]);
+ for (j=0;j<nbOutputs;j++)
+ ret = scanf(" %f", &outputs[i*nbOutputs+j]);
+ if (feof(stdin))
+ {
+ nbSamples = i;
+ break;
+ }
+ }
+ int topo[3] = {nbInputs, nbHidden, nbOutputs};
+ MLPTrain *net;
+
+ fprintf (stderr, "Got %d samples\n", nbSamples);
+ net = mlp_init(topo, 3, inputs, outputs, nbSamples);
+ rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1);
+ printf ("#include \"mlp.h\"\n\n");
+ printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed);
+ printf ("static const float weights[%d] = {\n", (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2]);
+ printf ("\n/* hidden layer */\n");
+ for (i=0;i<(topo[0]+1)*topo[1];i++)
+ {
+ printf ("%g, ", net->weights[0][i]);
+ if (i%5==4)
+ printf("\n");
+ }
+ printf ("\n/* output layer */\n");
+ for (i=0;i<(topo[1]+1)*topo[2];i++)
+ {
+ printf ("%g, ", net->weights[1][i]);
+ if (i%5==4)
+ printf("\n");
+ }
+ printf ("};\n\n");
+ printf ("static const int topo[3] = {%d, %d, %d};\n\n", topo[0], topo[1], topo[2]);
+ printf ("const MLP net = {\n");
+ printf ("\t3,\n");
+ printf ("\ttopo,\n");
+ printf ("\tweights\n};\n");
+ return 0;
+}
diff --git a/src/mlp_train.h b/src/mlp_train.h
new file mode 100644
index 00000000..1857f644
--- /dev/null
+++ b/src/mlp_train.h
@@ -0,0 +1,86 @@
+/* Copyright (c) 2008-2011 Octasic Inc.
+ Written by Jean-Marc Valin */
+/*
+ Redistribution and use in source and binary forms, with or without
+ modification, are permitted provided that the following conditions
+ are met:
+
+ - Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+
+ - Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+
+ THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR
+ CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+ LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+ NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+#ifndef _MLP_TRAIN_H_
+#define _MLP_TRAIN_H_
+
+#include <math.h>
+#include <stdlib.h>
+
+double tansig_table[501];
+static inline double tansig_double(double x)
+{
+ return 2./(1.+exp(-2.*x)) - 1.;
+}
+static inline void build_tansig_table()
+{
+ int i;
+ for (i=0;i<501;i++)
+ tansig_table[i] = tansig_double(.04*(i-250));
+}
+
+static inline double tansig_approx(double x)
+{
+ int i;
+ double y, dy;
+ if (x>=10)
+ return 1;
+ if (x<=-10)
+ return -1;
+ i = lrint(25*x);
+ x -= .04*i;
+ y = tansig_table[250+i];
+ dy = 1-y*y;
+ y = y + x*dy*(1 - y*x);
+ return y;
+}
+
+inline float randn(float sd)
+{
+ float U1, U2, S, x;
+ do {
+ U1 = ((float)rand())/RAND_MAX;
+ U2 = ((float)rand())/RAND_MAX;
+ U1 = 2*U1-1;
+ U2 = 2*U2-1;
+ S = U1*U1 + U2*U2;
+ } while (S >= 1 || S == 0.0f);
+ x = sd*sqrt(-2 * log(S) / S) * U1;
+ return x;
+}
+
+
+typedef struct {
+ int layers;
+ int *topo;
+ double **weights;
+ double **best_weights;
+ double *in_rate;
+} MLPTrain;
+
+
+#endif /* _MLP_TRAIN_H_ */
diff --git a/src/opus_encoder.c b/src/opus_encoder.c
index aae31256..4b6995d1 100644
--- a/src/opus_encoder.c
+++ b/src/opus_encoder.c
@@ -40,7 +40,8 @@
#include "arch.h"
#include "opus_private.h"
#include "os_support.h"
-
+#include "analysis.h"
+#include "mathops.h"
#include "tuning_parameters.h"
#ifdef FIXED_POINT
#include "fixed/structs_FIX.h"
@@ -84,7 +85,9 @@ struct OpusEncoder {
/* Sampling rate (at the API level) */
int first;
opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2];
-
+#ifndef FIXED_POINT
+ TonalityAnalysisState analysis;
+#endif
opus_uint32 rangeFinal;
};
@@ -365,6 +368,56 @@ static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *ou
#endif
}
+#ifdef FIXED_POINT
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+ int c, i;
+ int shift;
+
+ /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */
+ shift=celt_ilog2(Fs/(cutoff_Hz*3));
+ for (c=0;c<channels;c++)
+ {
+ for (i=0;i<len;i++)
+ {
+ opus_val32 x, tmp, y;
+ x = SHL32(EXTEND32(in[channels*i+c]), 15);
+ /* First stage */
+ tmp = x-hp_mem[2*c];
+ hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift);
+ /* Second stage */
+ y = tmp - hp_mem[2*c+1];
+ hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift);
+ out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767));
+ }
+ }
+}
+
+#else
+static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs)
+{
+ int c, i;
+ float coef;
+
+ coef = 4.*cutoff_Hz/Fs;
+ for (c=0;c<channels;c++)
+ {
+ for (i=0;i<len;i++)
+ {
+ opus_val32 x, tmp, y;
+ x = in[channels*i+c];
+ /* First stage */
+ tmp = x-hp_mem[2*c];
+ hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]);
+ /* Second stage */
+ y = tmp - hp_mem[2*c+1];
+ hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]);
+ out[channels*i+c] = y;
+ }
+ }
+}
+#endif
+
static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2,
int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs)
{
@@ -472,6 +525,11 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
opus_int32 max_rate; /* Max bitrate we're allowed to use */
int curr_bandwidth;
opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */
+ int extra_buffer, total_buffer;
+ int perform_analysis=0;
+#ifndef FIXED_POINT
+ AnalysisInfo analysis_info;
+#endif
VARDECL(opus_val16, tmp_prefill);
ALLOC_STACK;
@@ -493,11 +551,20 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
silk_enc = (char*)st+st->silk_enc_offset;
celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset);
+#ifndef FIXED_POINT
+ perform_analysis = st->silk_mode.complexity >= 7 && frame_size >= st->Fs/100 && st->Fs==48000;
+#endif
if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY)
delay_compensation = 0;
else
delay_compensation = st->delay_compensation;
-
+ if (perform_analysis)
+ {
+ total_buffer = IMAX(st->Fs/200, delay_compensation);
+ } else {
+ total_buffer = delay_compensation;
+ }
+ extra_buffer = total_buffer-delay_compensation;
st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes);
frame_rate = st->Fs/frame_size;
@@ -839,9 +906,9 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
ec_enc_init(&enc, data, max_data_bytes-1);
- ALLOC(pcm_buf, (delay_compensation+frame_size)*st->channels, opus_val16);
- for (i=0;i<delay_compensation*st->channels;i++)
- pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-delay_compensation)*st->channels+i];
+ ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16);
+ for (i=0;i<total_buffer*st->channels;i++)
+ pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i];
if (st->mode == MODE_CELT_ONLY)
hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 );
@@ -856,12 +923,26 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
if (st->application == OPUS_APPLICATION_VOIP)
{
- hp_cutoff(pcm, cutoff_Hz, &pcm_buf[delay_compensation*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
+ hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
} else {
- for (i=0;i<frame_size*st->channels;i++)
- pcm_buf[delay_compensation*st->channels + i] = pcm[i];
+ dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs);
}
+#ifndef FIXED_POINT
+ if (perform_analysis)
+ {
+ int nb_analysis_frames;
+ nb_analysis_frames = frame_size/(st->Fs/100);
+ for (i=0;i<nb_analysis_frames;i++)
+ tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels);
+ if (st->signal_type == OPUS_AUTO)
+ st->voice_ratio = floor(.5+100*(1-analysis_info.music_prob));
+ } else {
+ analysis_info.valid = 0;
+ st->voice_ratio = -1;
+ }
+#endif
+
/* SILK processing */
if (st->mode != MODE_CELT_ONLY)
{
@@ -965,10 +1046,10 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
}
#ifdef FIXED_POINT
- pcm_silk = pcm_buf+delay_compensation*st->channels;
+ pcm_silk = pcm_buf+total_buffer*st->channels;
#else
for (i=0;i<frame_size*st->channels;i++)
- pcm_silk[i] = FLOAT2INT16(pcm_buf[delay_compensation*st->channels + i]);
+ pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]);
#endif
ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 );
if( ret ) {
@@ -1070,13 +1151,13 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0)
{
for (i=0;i<st->channels*st->Fs/400;i++)
- tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-st->delay_compensation-st->Fs/400)*st->channels + i];
+ tmp_prefill[i] = st->delay_buffer[(extra_buffer+st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i];
}
- for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+delay_compensation));i++)
+ for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++)
st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size];
for (;i<st->encoder_buffer*st->channels;i++)
- st->delay_buffer[i] = pcm_buf[(frame_size+delay_compensation-st->encoder_buffer)*st->channels+i];
+ st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i];
if (st->mode != MODE_HYBRID || st->stream_channels==1)
@@ -1097,7 +1178,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
g1 *= (1.f/16384);
g2 *= (1.f/16384);
#endif
- stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap,
+ stereo_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels, g1, g2, celt_mode->overlap,
frame_size, st->channels, celt_mode->window, st->Fs);
st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14;
}
@@ -1151,7 +1232,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
int err;
celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0));
celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0));
- err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
+ err = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL);
if (err < 0)
{
RESTORE_STACK;
@@ -1177,7 +1258,11 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
/* If false, we already busted the budget and we'll end up with a "PLC packet" */
if (ec_tell(&enc) <= 8*nb_compr_bytes)
{
- ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc);
+#ifndef FIXED_POINT
+ if (perform_analysis)
+ celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info));
+#endif
+ ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc);
if (ret < 0)
{
RESTORE_STACK;
@@ -1200,9 +1285,9 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s
celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0));
/* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */
- celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL);
+ celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2-N4), N4, dummy, 2, NULL);
- err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
+ err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL);
if (err < 0)
{
RESTORE_STACK;
diff --git a/src/tansig_table.h b/src/tansig_table.h
new file mode 100644
index 00000000..a5aba230
--- /dev/null
+++ b/src/tansig_table.h
@@ -0,0 +1,45 @@
+/* This file is auto-generated by gen_tables */
+
+static const opus_val16 tansig_table[201] = {
+0.000000, 0.039979, 0.079830, 0.119427, 0.158649,
+0.197375, 0.235496, 0.272905, 0.309507, 0.345214,
+0.379949, 0.413644, 0.446244, 0.477700, 0.507977,
+0.537050, 0.564900, 0.591519, 0.616909, 0.641077,
+0.664037, 0.685809, 0.706419, 0.725897, 0.744277,
+0.761594, 0.777888, 0.793199, 0.807569, 0.821040,
+0.833655, 0.845456, 0.856485, 0.866784, 0.876393,
+0.885352, 0.893698, 0.901468, 0.908698, 0.915420,
+0.921669, 0.927473, 0.932862, 0.937863, 0.942503,
+0.946806, 0.950795, 0.954492, 0.957917, 0.961090,
+0.964028, 0.966747, 0.969265, 0.971594, 0.973749,
+0.975743, 0.977587, 0.979293, 0.980869, 0.982327,
+0.983675, 0.984921, 0.986072, 0.987136, 0.988119,
+0.989027, 0.989867, 0.990642, 0.991359, 0.992020,
+0.992631, 0.993196, 0.993718, 0.994199, 0.994644,
+0.995055, 0.995434, 0.995784, 0.996108, 0.996407,
+0.996682, 0.996937, 0.997172, 0.997389, 0.997590,
+0.997775, 0.997946, 0.998104, 0.998249, 0.998384,
+0.998508, 0.998623, 0.998728, 0.998826, 0.998916,
+0.999000, 0.999076, 0.999147, 0.999213, 0.999273,
+0.999329, 0.999381, 0.999428, 0.999472, 0.999513,
+0.999550, 0.999585, 0.999617, 0.999646, 0.999673,
+0.999699, 0.999722, 0.999743, 0.999763, 0.999781,
+0.999798, 0.999813, 0.999828, 0.999841, 0.999853,
+0.999865, 0.999875, 0.999885, 0.999893, 0.999902,
+0.999909, 0.999916, 0.999923, 0.999929, 0.999934,
+0.999939, 0.999944, 0.999948, 0.999952, 0.999956,
+0.999959, 0.999962, 0.999965, 0.999968, 0.999970,
+0.999973, 0.999975, 0.999977, 0.999978, 0.999980,
+0.999982, 0.999983, 0.999984, 0.999986, 0.999987,
+0.999988, 0.999989, 0.999990, 0.999990, 0.999991,
+0.999992, 0.999992, 0.999993, 0.999994, 0.999994,
+0.999994, 0.999995, 0.999995, 0.999996, 0.999996,
+0.999996, 0.999997, 0.999997, 0.999997, 0.999997,
+0.999997, 0.999998, 0.999998, 0.999998, 0.999998,
+0.999998, 0.999998, 0.999999, 0.999999, 0.999999,
+0.999999, 0.999999, 0.999999, 0.999999, 0.999999,
+0.999999, 0.999999, 0.999999, 0.999999, 0.999999,
+1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+1.000000, 1.000000, 1.000000, 1.000000, 1.000000,
+1.000000,
+};