diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2012-10-09 03:07:06 -0400 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2012-10-09 03:07:06 -0400 |
commit | 7315b35e13a3a7c504ed6b1fe2d28ad500eb2701 (patch) | |
tree | c6ed1c6869b13c5e2514c3ff7cfda4ce350f3fc8 | |
parent | ca82894ef16bbd74839cb93e35486e5a3b90426d (diff) | |
parent | 317ffc203efc63333fc3b6a42fdb2887321a4325 (diff) | |
download | opus-7315b35e13a3a7c504ed6b1fe2d28ad500eb2701.tar.gz |
Merge branch 'exp_analysis7'
Conflicts:
celt/celt.c
celt/mdct.c
include/opus_defines.h
src/opus_encoder.c
-rw-r--r-- | Makefile.am | 1 | ||||
-rw-r--r-- | celt/bands.c | 15 | ||||
-rw-r--r-- | celt/bands.h | 2 | ||||
-rw-r--r-- | celt/celt.c | 682 | ||||
-rw-r--r-- | celt/celt.h | 24 | ||||
-rw-r--r-- | celt/mathops.h | 48 | ||||
-rw-r--r-- | celt/mdct.c | 6 | ||||
-rw-r--r-- | celt/pitch.c | 10 | ||||
-rw-r--r-- | celt/quant_bands.c | 8 | ||||
-rw-r--r-- | celt/quant_bands.h | 6 | ||||
-rw-r--r-- | opus_headers.mk | 5 | ||||
-rw-r--r-- | opus_sources.mk | 5 | ||||
-rw-r--r-- | src/analysis.c | 460 | ||||
-rw-r--r-- | src/analysis.h | 60 | ||||
-rw-r--r-- | src/mlp.c | 109 | ||||
-rw-r--r-- | src/mlp.h | 41 | ||||
-rw-r--r-- | src/mlp_data.c | 73 | ||||
-rw-r--r-- | src/mlp_train.c | 496 | ||||
-rw-r--r-- | src/mlp_train.h | 86 | ||||
-rw-r--r-- | src/opus_encoder.c | 123 | ||||
-rw-r--r-- | src/tansig_table.h | 45 |
21 files changed, 2070 insertions, 235 deletions
diff --git a/Makefile.am b/Makefile.am index db37d998..07876626 100644 --- a/Makefile.am +++ b/Makefile.am @@ -14,6 +14,7 @@ if FIXED_POINT SILK_SOURCES += $(SILK_SOURCES_FIXED) else SILK_SOURCES += $(SILK_SOURCES_FLOAT) +OPUS_SOURCES += $(OPUS_SOURCES_FLOAT) endif include celt_headers.mk diff --git a/celt/bands.c b/celt/bands.c index f38b6626..531d3118 100644 --- a/celt/bands.c +++ b/celt/bands.c @@ -41,6 +41,21 @@ #include "mathops.h" #include "rate.h" +int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev) +{ + int i; + for (i=0;i<N;i++) + { + if (val < thresholds[i]) + break; + } + if (i>prev && val < thresholds[prev]+hysteresis[prev]) + i=prev; + if (i<prev && val > thresholds[prev-1]-hysteresis[prev-1]) + i=prev; + return i; +} + opus_uint32 celt_lcg_rand(opus_uint32 seed) { return 1664525 * seed + 1013904223; diff --git a/celt/bands.h b/celt/bands.h index 9ff8ffd7..47d15b6d 100644 --- a/celt/bands.h +++ b/celt/bands.h @@ -92,4 +92,6 @@ void anti_collapse(const CELTMode *m, celt_norm *X_, unsigned char *collapse_mas opus_uint32 celt_lcg_rand(opus_uint32 seed); +int hysteresis_decision(opus_val16 val, const opus_val16 *thresholds, const opus_val16 *hysteresis, int N, int prev); + #endif /* BANDS_H */ diff --git a/celt/celt.c b/celt/celt.c index 28c228d0..7580fa2d 100644 --- a/celt/celt.c +++ b/celt/celt.c @@ -178,6 +178,7 @@ struct OpusCustomEncoder { int prefilter_tapset_old; #endif int consec_transient; + AnalysisInfo analysis; opus_val32 preemph_memE[2]; opus_val32 preemph_memD[2]; @@ -187,6 +188,9 @@ struct OpusCustomEncoder { opus_int32 vbr_drift; opus_int32 vbr_offset; opus_int32 vbr_count; + opus_val16 overlap_max; + opus_val16 stereo_saving; + int intensity; #ifdef RESYNTH celt_sig syn_mem[2][2*MAX_PERIOD]; @@ -303,92 +307,128 @@ static inline opus_val16 SIG2WORD16(celt_sig x) } static int transient_analysis(const opus_val32 * OPUS_RESTRICT in, int len, int C, - int overlap) + int overlap, opus_val16 *tf_estimate, int *tf_chan, AnalysisInfo *analysis) { int i; VARDECL(opus_val16, tmp); - opus_val32 mem0=0,mem1=0; + opus_val32 mem0,mem1; int is_transient = 0; int block; - int N; + int c, N; + opus_val16 maxbin; + int tf_max; VARDECL(opus_val16, bins); + opus_val16 T1, T2, T3, T4, T5; + opus_val16 follower; + int metric=0; + int fmetric=0, bmetric=0; + int count1, count2, count3, count4, count5;; + SAVE_STACK; ALLOC(tmp, len, opus_val16); - block = overlap/2; - N=len/block; + block = overlap/4; + N=len/block-1; ALLOC(bins, N, opus_val16); - if (C==1) + + tf_max = 0; + for (c=0;c<C;c++) { + mem0=0; + mem1=0; for (i=0;i<len;i++) - tmp[i] = SHR32(in[i],SIG_SHIFT); - } else { - for (i=0;i<len;i++) - tmp[i] = SHR32(ADD32(in[i],in[i+len]), SIG_SHIFT+1); - } + tmp[i] = SHR32(in[i+c*len],SIG_SHIFT); - /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */ - for (i=0;i<len;i++) - { - opus_val32 x,y; - x = tmp[i]; - y = ADD32(mem0, x); + /* High-pass filter: (1 - 2*z^-1 + z^-2) / (1 - z^-1 + .5*z^-2) */ + for (i=0;i<len;i++) + { + opus_val32 x,y; + x = tmp[i]; + y = ADD32(mem0, x); #ifdef FIXED_POINT - mem0 = mem1 + y - SHL32(x,1); - mem1 = x - SHR32(y,1); + mem0 = mem1 + y - SHL32(x,1); + mem1 = x - SHR32(y,1); #else - mem0 = mem1 + y - 2*x; - mem1 = x - .5f*y; + mem0 = mem1 + y - 2*x; + mem1 = x - .5f*y; #endif - tmp[i] = EXTRACT16(SHR32(y,2)); - } - /* First few samples are bad because we don't propagate the memory */ - for (i=0;i<12;i++) - tmp[i] = 0; + tmp[i] = EXTRACT16(SHR(y,2)); + } + /* First few samples are bad because we don't propagate the memory */ + for (i=0;i<12;i++) + tmp[i] = 0; - for (i=0;i<N;i++) - { - int j; - opus_val16 max_abs=0; - for (j=0;j<block;j++) - max_abs = MAX16(max_abs, ABS16(tmp[i*block+j])); - bins[i] = max_abs; - } - for (i=0;i<N;i++) - { - int j; - int conseq=0; - opus_val16 t1, t2, t3; - - t1 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]); - t2 = MULT16_16_Q15(QCONST16(.4f, 15), bins[i]); - t3 = MULT16_16_Q15(QCONST16(.15f, 15), bins[i]); - for (j=0;j<i;j++) - { - if (bins[j] < t1) - conseq++; - if (bins[j] < t2) - conseq++; - else - conseq = 0; + maxbin=0; + for (i=0;i<N;i++) + { + int j; + opus_val16 max_abs=0; + for (j=0;j<2*block;j++) + max_abs = MAX16(max_abs, ABS16(tmp[i*block+j])); + bins[i] = max_abs; + maxbin = MAX16(maxbin, bins[i]); } - if (conseq>=3) + + T1 = QCONST16(.09f, 15); + T2 = QCONST16(.12f, 15); + T3 = QCONST16(.18f, 15); + T4 = QCONST16(.28f, 15); + T5 = QCONST16(.4f, 15); + + follower = 0; + count1=count2=count3=count4=count5=0; + for (i=0;i<N;i++) + { + follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower)); + if (bins[i] < MULT16_16_Q15(T1, follower)) + count1++; + if (bins[i] < MULT16_16_Q15(T2, follower)) + count2++; + if (bins[i] < MULT16_16_Q15(T3, follower)) + count3++; + if (bins[i] < MULT16_16_Q15(T4, follower)) + count4++; + if (bins[i] < MULT16_16_Q15(T5, follower)) + count5++; + } + fmetric = (5*count1 + 4*count2 + 3*count3 + 2*count4 + count5)/2; + follower=0; + count1=count2=count3=count4=count5=0; + for (i=N-1;i>=0;i--) + { + follower = MAX16(bins[i], MULT16_16_Q15(QCONST16(0.97f, 15), follower)); + if (bins[i] < MULT16_16_Q15(T1, follower)) + count1++; + if (bins[i] < MULT16_16_Q15(T2, follower)) + count2++; + if (bins[i] < MULT16_16_Q15(T3, follower)) + count3++; + if (bins[i] < MULT16_16_Q15(T4, follower)) + count4++; + if (bins[i] < MULT16_16_Q15(T5, follower)) + count5++; + } + bmetric = 5*count1 + 4*count2 + 3*count3 + 2*count4 + count5; + metric = fmetric+bmetric; + + /*if (metric>40)*/ + if (metric>20+50*MAX16(analysis->tonality, analysis->noisiness)) is_transient=1; - conseq = 0; - for (j=i+1;j<N;j++) + + if (metric>tf_max) { - if (bins[j] < t3) - conseq++; - else - conseq = 0; + *tf_chan = c; + tf_max = metric; } - if (conseq>=7) - is_transient=1; } + /* *tf_estimate = 1 + MIN16(1, sqrt(MAX16(0, tf_max-30))/20); */ + *tf_estimate = QCONST16(1.f, 14) + celt_sqrt(MAX16(0, SHL32(MULT16_16(QCONST16(0.0069,14),IMIN(163,tf_max)),14)-QCONST32(0.139,28))); + RESTORE_STACK; #ifdef FUZZING is_transient = rand()&0x1; #endif + /*printf("%d %f %f %f %f\n", is_transient, *tf_estimate, tf_max, analysis->tonality, analysis->noisiness);*/ return is_transient; } @@ -545,34 +585,22 @@ static const signed char tf_select_table[4][8] = { {0, -2, 0, -3, 3, 0, 1,-1}, }; -static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, int width) +static opus_val32 l1_metric(const celt_norm *tmp, int N, int LM, opus_val16 bias) { - int i, j; - static const opus_val16 sqrtM_1[4] = {Q15ONE, QCONST16(.70710678f,15), QCONST16(0.5f,15), QCONST16(0.35355339f,15)}; + int i; opus_val32 L1; - opus_val16 bias; - L1=0; - for (i=0;i<1<<LM;i++) - { - opus_val32 L2 = 0; - for (j=0;j<N>>LM;j++) - L2 = MAC16_16(L2, tmp[(j<<LM)+i], tmp[(j<<LM)+i]); - L1 += celt_sqrt(L2); - } - L1 = MULT16_32_Q15(sqrtM_1[LM], L1); - if (width==1) - bias = QCONST16(.12f,15)*LM; - else if (width==2) - bias = QCONST16(.05f,15)*LM; - else - bias = QCONST16(.02f,15)*LM; - L1 = MAC16_32_Q15(L1, bias, L1); + L1 = 0; + for (i=0;i<N;i++) + L1 += EXTEND32(ABS16(tmp[i])); + /* When in doubt, prefer good freq resolution */ + L1 = MAC16_32_Q15(L1, LM*bias, L1); return L1; + } static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, int *tf_res, int nbCompressedBytes, celt_norm *X, int N0, int LM, - int *tf_sum) + int *tf_sum, opus_val16 tf_estimate, int tf_chan) { int i; VARDECL(int, metric); @@ -581,9 +609,16 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, VARDECL(int, path0); VARDECL(int, path1); VARDECL(celt_norm, tmp); + VARDECL(celt_norm, tmp_1); int lambda; + int sel; + int selcost[2]; int tf_select=0; + opus_val16 bias; + SAVE_STACK; + bias = MULT16_16_Q14(QCONST16(.04f,15), MAX16(-QCONST16(.25f,14), QCONST16(1.5f,14)-tf_estimate)); + /*printf("%f ", bias);*/ if (nbCompressedBytes<15*C) { @@ -600,9 +635,10 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, lambda = 4; else lambda = 3; - + lambda*=2; ALLOC(metric, len, int); ALLOC(tmp, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); + ALLOC(tmp_1, (m->eBands[len]-m->eBands[len-1])<<LM, celt_norm); ALLOC(path0, len, int); ALLOC(path1, len, int); @@ -610,19 +646,35 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, for (i=0;i<len;i++) { int j, k, N; + int narrow; opus_val32 L1, best_L1; int best_level=0; N = (m->eBands[i+1]-m->eBands[i])<<LM; + /* band is too narrow to be split down to LM=-1 */ + narrow = (m->eBands[i+1]-m->eBands[i])==1; for (j=0;j<N;j++) - tmp[j] = X[j+(m->eBands[i]<<LM)]; + tmp[j] = X[tf_chan*N0 + j+(m->eBands[i]<<LM)]; /* Just add the right channel if we're in stereo */ - if (C==2) + /*if (C==2) for (j=0;j<N;j++) - tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1)); - L1 = l1_metric(tmp, N, isTransient ? LM : 0, N>>LM); + tmp[j] = ADD16(SHR16(tmp[j], 1),SHR16(X[N0+j+(m->eBands[i]<<LM)], 1));*/ + L1 = l1_metric(tmp, N, isTransient ? LM : 0, bias); best_L1 = L1; + /* Check the -1 case for transients */ + if (isTransient && !narrow) + { + for (j=0;j<N;j++) + tmp_1[j] = tmp[j]; + haar1(tmp_1, N>>LM, 1<<LM); + L1 = l1_metric(tmp_1, N, LM+1, bias); + if (L1<best_L1) + { + best_L1 = L1; + best_level = -1; + } + } /*printf ("%f ", L1);*/ - for (k=0;k<LM;k++) + for (k=0;k<LM+!(isTransient||narrow);k++) { int B; @@ -631,12 +683,9 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, else B = k+1; - if (isTransient) - haar1(tmp, N>>(LM-k), 1<<(LM-k)); - else - haar1(tmp, N>>k, 1<<k); + haar1(tmp, N>>k, 1<<k); - L1 = l1_metric(tmp, N, B, N>>LM); + L1 = l1_metric(tmp, N, B, bias); if (L1 < best_L1) { @@ -645,17 +694,40 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, } } /*printf ("%d ", isTransient ? LM-best_level : best_level);*/ + /* metric is in Q1 to be able to select the mid-point (-0.5) for narrower bands */ if (isTransient) - metric[i] = best_level; + metric[i] = 2*best_level; else - metric[i] = -best_level; - *tf_sum += metric[i]; + metric[i] = -2*best_level; + *tf_sum += (isTransient ? LM : 0) - metric[i]/2; + /* For bands that can't be split to -1, set the metric to the half-way point to avoid + biasing the decision */ + if (narrow && (metric[i]==0 || metric[i]==-2*LM)) + metric[i]-=1; + /*printf("%d ", metric[i]);*/ } /*printf("\n");*/ - /* NOTE: Future optimized implementations could detect extreme transients and set - tf_select = 1 but so far we have not found a reliable way of making this useful */ + /* Search for the optimal tf resolution, including tf_select */ tf_select = 0; - + for (sel=0;sel<2;sel++) + { + cost0 = 0; + cost1 = isTransient ? 0 : lambda; + for (i=1;i<len;i++) + { + int curr0, curr1; + curr0 = IMIN(cost0, cost1 + lambda); + curr1 = IMIN(cost0 + lambda, cost1); + cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+0]); + cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*sel+1]); + } + cost0 = IMIN(cost0, cost1); + selcost[sel]=cost0; + } + /* For now, we're conservative and only allow tf_select=1 for transients. + * If tests confirm it's useful for non-transients, we could allow it. */ + if (selcost[1]<selcost[0] && isTransient) + tf_select=1; cost0 = 0; cost1 = isTransient ? 0 : lambda; /* Viterbi forward pass */ @@ -685,8 +757,8 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, curr1 = from1; path1[i]= 1; } - cost0 = curr0 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+0]); - cost1 = curr1 + abs(metric[i]-tf_select_table[LM][4*isTransient+2*tf_select+1]); + cost0 = curr0 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+0]); + cost1 = curr1 + abs(metric[i]-2*tf_select_table[LM][4*isTransient+2*tf_select+1]); } tf_res[len-1] = cost0 < cost1 ? 0 : 1; /* Viterbi backward pass to check the decisions */ @@ -697,6 +769,7 @@ static int tf_analysis(const CELTMode *m, int len, int C, int isTransient, else tf_res[i] = path0[i+1]; } + /*printf("%d %f\n", *tf_sum, tf_estimate);*/ RESTORE_STACK; #ifdef FUZZING tf_select = rand()&0x1; @@ -744,7 +817,7 @@ static void tf_encode(int start, int end, int isTransient, int *tf_res, int LM, tf_select = 0; for (i=start;i<end;i++) tf_res[i] = tf_select_table[LM][4*isTransient+2*tf_select+tf_res[i]]; - /*printf("%d %d ", isTransient, tf_select); for(i=0;i<end;i++)printf("%d ", tf_res[i]);printf("\n");*/ + /*for(i=0;i<end;i++)printf("%d ", isTransient ? tf_res[i] : LM+tf_res[i]);printf("\n");*/ } static void tf_decode(int start, int end, int isTransient, int *tf_res, int LM, ec_dec *dec) @@ -798,15 +871,20 @@ static void init_caps(const CELTMode *m,int *cap,int LM,int C) } static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, - const opus_val16 *bandLogE, int end, int LM, int C, int N0) + const opus_val16 *bandLogE, int end, int LM, int C, int N0, + AnalysisInfo *analysis, opus_val16 *stereo_saving, opus_val16 tf_estimate, + int intensity) { int i; opus_val32 diff=0; int c; int trim_index = 5; + opus_val16 trim = QCONST16(5.f, 8); + opus_val16 logXC, logXC2; if (C==2) { opus_val16 sum = 0; /* Q10 */ + opus_val16 minXC; /* Q10 */ /* Compute inter-channel correlation for low frequencies */ for (i=0;i<8;i++) { @@ -817,6 +895,15 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, sum = ADD16(sum, EXTRACT16(SHR32(partial, 18))); } sum = MULT16_16_Q15(QCONST16(1.f/8, 15), sum); + minXC = sum; + for (i=8;i<intensity;i++) + { + int j; + opus_val32 partial = 0; + for (j=m->eBands[i]<<LM;j<m->eBands[i+1]<<LM;j++) + partial = MAC16_16(partial, X[j], X[N0+j]); + minXC = MIN16(minXC, EXTRACT16(SHR32(partial, 18))); + } /*printf ("%f\n", sum);*/ if (sum > QCONST16(.995f,10)) trim_index-=4; @@ -826,18 +913,28 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, trim_index-=2; else if (sum > QCONST16(.8f,10)) trim_index-=1; + /* mid-side savings estimations based on the LF average*/ + logXC = celt_log2(QCONST32(1.001f, 20)-MULT16_16(sum, sum)); + /* mid-side savings estimations based on min correlation */ + logXC2 = MAX16(HALF16(logXC), celt_log2(QCONST32(1.001f, 20)-MULT16_16(minXC, minXC))); +#ifdef FIXED_POINT + /* Compensate for Q20 vs Q14 input and convert output to Q8 */ + logXC = PSHR32(logXC-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8); + logXC2 = PSHR32(logXC2-QCONST16(6.f, DB_SHIFT),DB_SHIFT-8); +#endif + + trim += MAX16(-QCONST16(4.f, 8), MULT16_16_Q15(QCONST16(.75f,15),logXC)); + *stereo_saving = MIN16(*stereo_saving + QCONST16(0.25f, 8), -HALF16(logXC2)); } /* Estimate spectral tilt */ c=0; do { for (i=0;i<end-1;i++) { - diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-m->nbEBands); + diff += bandLogE[i+c*m->nbEBands]*(opus_int32)(2+2*i-end); } } while (++c<C); - /* We divide by two here to avoid making the tilt larger for stereo as a - result of a bug in the loop above */ - diff /= 2*C*(end-1); + diff /= C*(end-1); /*printf("%f\n", diff);*/ if (diff > QCONST16(2.f, DB_SHIFT)) trim_index--; @@ -847,11 +944,25 @@ static int alloc_trim_analysis(const CELTMode *m, const celt_norm *X, trim_index++; if (diff < -QCONST16(10.f, DB_SHIFT)) trim_index++; + trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), SHR16(diff+QCONST16(1.f, DB_SHIFT),DB_SHIFT-8)/6 )); + trim -= 2*SHR16(tf_estimate-QCONST16(1.f,14), 14-8); +#ifndef FIXED_POINT + if (analysis->valid) + { + trim -= MAX16(-QCONST16(2.f, 8), MIN16(QCONST16(2.f, 8), 2*(analysis->tonality_slope+.05))); + } +#endif +#ifdef FIXED_POINT + trim_index = PSHR32(trim, 8); +#else + trim_index = floor(.5+trim); +#endif if (trim_index<0) trim_index = 0; if (trim_index>10) trim_index = 10; + /*printf("%d\n", trim_index);*/ #ifdef FUZZING trim_index = rand()%11; #endif @@ -900,6 +1011,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, VARDECL(celt_norm, X); VARDECL(celt_ener, bandE); VARDECL(opus_val16, bandLogE); + VARDECL(opus_val16, bandLogE2); VARDECL(int, fine_quant); VARDECL(opus_val16, error); VARDECL(int, pulses); @@ -923,7 +1035,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int alloc_trim; int pitch_index=COMBFILTER_MINPERIOD; opus_val16 gain1 = 0; - int intensity=0; int dual_stereo=0; int effectiveBytes; opus_val16 pf_threshold; @@ -938,8 +1049,15 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, int anti_collapse_rsv; int anti_collapse_on=0; int silence=0; + int tf_chan = 0; + opus_val16 tf_estimate; + int pitch_change=0; + opus_int32 tot_boost=0; + opus_val16 sample_max; + opus_val16 maxDepth; ALLOC_STACK; + tf_estimate = QCONST16(1.0f,14); if (nbCompressedBytes<2 || pcm==NULL) return OPUS_BAD_ARG; @@ -1054,6 +1172,9 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(in, CC*(N+st->overlap), celt_sig); + sample_max=MAX16(st->overlap_max, celt_maxabs16(pcm, C*(N-st->mode->overlap))); + st->overlap_max=celt_maxabs16(pcm+C*(N-st->mode->overlap), C*st->mode->overlap); + sample_max=MAX16(sample_max, st->overlap_max); /* Find pitch period and gain */ { VARDECL(celt_sig, _pre); @@ -1093,13 +1214,17 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, *inp = tmp + st->preemph_memE[c]; st->preemph_memE[c] = MULT16_32_Q15(st->mode->preemph[1], *inp) - MULT16_32_Q15(st->mode->preemph[0], tmp); - silence = silence && *inp == 0; inp++; } OPUS_COPY(pre[c], prefilter_mem+c*COMBFILTER_MAXPERIOD, COMBFILTER_MAXPERIOD); OPUS_COPY(pre[c]+COMBFILTER_MAXPERIOD, in+c*(N+st->overlap)+st->overlap, N); } while (++c<CC); +#ifdef FIXED_POINT + silence = (sample_max==0); +#else + silence = (sample_max <= (opus_val16)1/(1<<st->lsb_depth)); +#endif #ifdef FUZZING if ((rand()&0x3F)==0) silence = 1; @@ -1129,8 +1254,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, ALLOC(pitch_buf, (COMBFILTER_MAXPERIOD+N)>>1, opus_val16); pitch_downsample(pre, pitch_buf, COMBFILTER_MAXPERIOD+N, CC); + /* Don't search for the fir last 1.5 octave of the range because + there's too many false-positives due to short-term correlation */ pitch_search(pitch_buf+(COMBFILTER_MAXPERIOD>>1), pitch_buf, N, - COMBFILTER_MAXPERIOD-COMBFILTER_MINPERIOD, &pitch_index); + COMBFILTER_MAXPERIOD-3*COMBFILTER_MINPERIOD, &pitch_index); pitch_index = COMBFILTER_MAXPERIOD-pitch_index; gain1 = remove_doubling(pitch_buf, COMBFILTER_MAXPERIOD, COMBFILTER_MINPERIOD, @@ -1138,6 +1265,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (pitch_index > COMBFILTER_MAXPERIOD-2) pitch_index = COMBFILTER_MAXPERIOD-2; gain1 = MULT16_16_Q15(QCONST16(.7f,15),gain1); + if ((gain1 > QCONST16(.4f,15) || st->prefilter_gain > QCONST16(.4f,15)) && st->analysis.tonality > .3 + && (pitch_index > 1.26*st->prefilter_period || pitch_index < .79*st->prefilter_period)) + pitch_change = 1; + /*printf("%d %d %f %f\n", pitch_change, pitch_index, gain1, st->analysis.tonality);*/ if (st->loss_rate>2) gain1 = HALF32(gain1); if (st->loss_rate>4) @@ -1236,7 +1367,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (st->complexity > 1) { isTransient = transient_analysis(in, N+st->overlap, CC, - st->overlap); + st->overlap, &tf_estimate, &tf_chan, &st->analysis); if (isTransient) shortBlocks = M; } @@ -1253,6 +1384,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, { for (i=0;i<N;i++) freq[i] = ADD32(HALF32(freq[i]), HALF32(freq[N+i])); + tf_chan = 0; } if (st->upsample != 1) { @@ -1265,17 +1397,53 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, freq[c*N+i] = 0; } while (++c<C); } - ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ - compute_band_energies(st->mode, freq, bandE, effEnd, C, M); amp2Log2(st->mode, effEnd, st->end, bandE, bandLogE, C); + /*for (i=0;i<21;i++) + printf("%f ", bandLogE[i]); + printf("\n");*/ + + ALLOC(bandLogE2, C*st->mode->nbEBands, opus_val16); + if (shortBlocks && st->complexity>=8) + { + VARDECL(celt_sig, freq2); + VARDECL(opus_val32, bandE2); + ALLOC(freq2, CC*N, celt_sig); + compute_mdcts(st->mode, 0, in, freq2, CC, LM); + if (CC==2&&C==1) + { + for (i=0;i<N;i++) + freq2[i] = ADD32(HALF32(freq2[i]), HALF32(freq2[N+i])); + } + if (st->upsample != 1) + { + c=0; do + { + int bound = N/st->upsample; + for (i=0;i<bound;i++) + freq2[c*N+i] *= st->upsample; + for (;i<N;i++) + freq2[c*N+i] = 0; + } while (++c<C); + } + ALLOC(bandE2, C*st->mode->nbEBands, opus_val32); + compute_band_energies(st->mode, freq2, bandE2, effEnd, C, M); + amp2Log2(st->mode, effEnd, st->end, bandE2, bandLogE2, C); + for (i=0;i<C*st->mode->nbEBands;i++) + bandLogE2[i] += HALF16(SHL16(LM, DB_SHIFT)); + } else { + for (i=0;i<C*st->mode->nbEBands;i++) + bandLogE2[i] = bandLogE[i]; + } + + ALLOC(X, C*N, celt_norm); /**< Interleaved normalised MDCTs */ /* Band normalisation */ normalise_bands(st->mode, freq, X, bandE, effEnd, C, M); ALLOC(tf_res, st->mode->nbEBands, int); - tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum); + tf_select = tf_analysis(st->mode, effEnd, C, isTransient, tf_res, effectiveBytes, X, N, LM, &tf_sum, tf_estimate, tf_chan); for (i=effEnd;i<st->end;i++) tf_res[i] = tf_res[effEnd-1]; @@ -1287,7 +1455,6 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, tf_encode(st->start, st->end, isTransient, tf_res, LM, tf_select, enc); - st->spread_decision = SPREAD_NORMAL; if (ec_tell(enc)+4<=total_bits) { if (shortBlocks || st->complexity < 3 || nbAvailableBytes < 10*C) @@ -1295,9 +1462,21 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, if (st->complexity == 0) st->spread_decision = SPREAD_NONE; } else { - st->spread_decision = spreading_decision(st->mode, X, - &st->tonal_average, st->spread_decision, &st->hf_average, - &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); + if (st->analysis.valid) + { + static const opus_val16 spread_thresholds[3] = {-QCONST16(.6f, 15), -QCONST16(.2f, 15), -QCONST16(.07f, 15)}; + static const opus_val16 spread_histeresis[3] = {QCONST16(.15f, 15), QCONST16(.07f, 15), QCONST16(.02f, 15)}; + static const opus_val16 tapset_thresholds[2] = {QCONST16(.0f, 15), QCONST16(.15f, 15)}; + static const opus_val16 tapset_histeresis[2] = {QCONST16(.1f, 15), QCONST16(.05f, 15)}; + st->spread_decision = hysteresis_decision(-st->analysis.tonality, spread_thresholds, spread_histeresis, 3, st->spread_decision); + st->tapset_decision = hysteresis_decision(st->analysis.tonality_slope, tapset_thresholds, tapset_histeresis, 2, st->tapset_decision); + } else { + st->spread_decision = spreading_decision(st->mode, X, + &st->tonal_average, st->spread_decision, &st->hf_average, + &st->tapset_decision, pf_on&&!shortBlocks, effEnd, C, M); + } + /*printf("%d %d\n", st->tapset_decision, st->spread_decision);*/ + /*printf("%f %d %f %d\n\n", st->analysis.tonality, st->spread_decision, st->analysis.tonality_slope, st->tapset_decision);*/ } ec_enc_icdf(enc, st->spread_decision, spread_icdf, 5); } @@ -1309,38 +1488,95 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, for (i=0;i<st->mode->nbEBands;i++) offsets[i] = 0; /* Dynamic allocation code */ + maxDepth=-QCONST16(32.f, DB_SHIFT); /* Make sure that dynamic allocation can't make us bust the budget */ if (effectiveBytes > 50 && LM>=1) { - int t1, t2; - if (LM <= 1) + int last=0; + VARDECL(opus_val16, follower); + ALLOC(follower, C*st->mode->nbEBands, opus_val16); + c=0;do + { + follower[c*st->mode->nbEBands] = bandLogE2[c*st->mode->nbEBands]; + for (i=1;i<st->end;i++) + { + /* The last band to be at least 3 dB higher than the previous one + is the last we'll consider. Otherwise, we run into problems on + bandlimited signals. */ + if (bandLogE2[c*st->mode->nbEBands+i] > bandLogE2[c*st->mode->nbEBands+i-1]+QCONST16(.5f,DB_SHIFT)) + last=i; + follower[c*st->mode->nbEBands+i] = MIN16(follower[c*st->mode->nbEBands+i-1]+QCONST16(1.5f,DB_SHIFT), bandLogE2[c*st->mode->nbEBands+i]); + } + for (i=last-1;i>=0;i--) + follower[c*st->mode->nbEBands+i] = MIN16(follower[c*st->mode->nbEBands+i], MIN16(follower[c*st->mode->nbEBands+i+1]+QCONST16(2.f,DB_SHIFT), bandLogE2[c*st->mode->nbEBands+i])); + for (i=0;i<st->end;i++) + { + opus_val16 noise_floor; + /* Noise floor must take into account eMeans, the depth, the width of the bands + and the preemphasis filter (approx. square of bark band ID) */ + noise_floor = MULT16_16(QCONST16(0.0625f, DB_SHIFT),st->mode->logN[i]) + +QCONST16(.5f,DB_SHIFT)+SHL16(9-st->lsb_depth,DB_SHIFT)-SHL16(eMeans[i],6) + +MULT16_16(QCONST16(.0062,DB_SHIFT),(i+5)*(i+5)); + follower[c*st->mode->nbEBands+i] = MAX16(follower[c*st->mode->nbEBands+i], noise_floor); + maxDepth = MAX16(maxDepth, bandLogE[c*st->mode->nbEBands+i]-noise_floor); + } + } while (++c<C); + if (C==2) { - t1 = 3; - t2 = 5; + for (i=st->start;i<st->end;i++) + { + /* Consider 24 dB "cross-talk" */ + follower[st->mode->nbEBands+i] = MAX16(follower[st->mode->nbEBands+i], follower[ i]-QCONST16(4.f,DB_SHIFT)); + follower[ i] = MAX16(follower[ i], follower[st->mode->nbEBands+i]-QCONST16(4.f,DB_SHIFT)); + follower[i] = HALF16(MAX16(0, bandLogE[i]-follower[i]) + MAX16(0, bandLogE[st->mode->nbEBands+i]-follower[st->mode->nbEBands+i])); + } } else { - t1 = 2; - t2 = 4; + for (i=st->start;i<st->end;i++) + { + follower[i] = MAX16(0, bandLogE[i]-follower[i]); + } } - for (i=st->start+1;i<st->end-1;i++) + /* For non-transient CBR/CVBR frames, halve the dynalloc contribution */ + if ((!st->vbr || st->constrained_vbr)&&!isTransient) { - opus_val32 d2; - d2 = 2*bandLogE[i]-bandLogE[i-1]-bandLogE[i+1]; - if (C==2) - d2 = HALF32(d2 + 2*bandLogE[i+st->mode->nbEBands]- - bandLogE[i-1+st->mode->nbEBands]-bandLogE[i+1+st->mode->nbEBands]); -#ifdef FUZZING - if((rand()&0xF)==0) + for (i=st->start;i<st->end;i++) + follower[i] = HALF16(follower[i]); + } + for (i=st->start;i<st->end;i++) + { + int width; + int boost; + int boost_bits; + + if (i<8) + follower[i] *= 2; + if (i>=12) + follower[i] = HALF16(follower[i]); + follower[i] = MIN16(follower[i], QCONST16(4, DB_SHIFT)); + + /* FIXME: Adaptively reduce follower at low rate or for cbr/cvbr */ + width = C*(st->mode->eBands[i+1]-st->mode->eBands[i])<<LM; + if (width<6) { - offsets[i] += 1; - if((rand()&0x3)==0) - offsets[i] += 1+(rand()&0x3); + boost = SHR32(EXTEND32(follower[i]),DB_SHIFT); + boost_bits = boost*width<<BITRES; + } else if (width > 48) { + boost = SHR32(EXTEND32(follower[i])*8,DB_SHIFT); + boost_bits = (boost*width<<BITRES)/8; + } else { + boost = SHR32(EXTEND32(follower[i])*width/6,DB_SHIFT); + boost_bits = boost*6<<BITRES; + } + /* For CBR and non-transient CVBR frames, limit dynalloc to 1/4 of the bits */ + if ((!st->vbr || (st->constrained_vbr&&!isTransient)) + && (tot_boost+boost_bits)>>BITRES>>3 > effectiveBytes/4) + { + offsets[i] = 0; + break; + } else { + offsets[i] = boost; + tot_boost += boost_bits; } -#else - if (d2 > SHL16(t1,DB_SHIFT)) - offsets[i] += 1; - if (d2 > SHL16(t2,DB_SHIFT)) - offsets[i] += 1; -#endif } } dynalloc_logp = 6; @@ -1377,11 +1613,36 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, dynalloc_logp = IMAX(2, dynalloc_logp-1); offsets[i] = boost; } + + if (C==2) + { + int effectiveRate; + + static const opus_val16 intensity_thresholds[21]= + /* 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 off*/ + { 16,21,23,25,27,29,31,33,35,38,42,46,50,54,58,63,68,75,84,102,130}; + static const opus_val16 intensity_histeresis[21]= + { 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 3, 4, 5, 6, 8, 12}; + + /* Always use MS for 2.5 ms frames until we can do a better analysis */ + if (LM!=0) + dual_stereo = stereo_analysis(st->mode, X, LM, N); + + /* Account for coarse energy */ + effectiveRate = (8*effectiveBytes - 80)>>LM; + + /* effectiveRate in kb/s */ + effectiveRate = 2*effectiveRate/5; + + st->intensity = hysteresis_decision(effectiveRate, intensity_thresholds, intensity_histeresis, 21, st->intensity); + st->intensity = IMIN(st->end,IMAX(st->start, st->intensity)); + } + alloc_trim = 5; if (tell+(6<<BITRES) <= total_bits - total_boost) { alloc_trim = alloc_trim_analysis(st->mode, X, bandLogE, - st->end, LM, C, N); + st->end, LM, C, N, &st->analysis, &st->stereo_saving, tf_estimate, st->intensity); ec_enc_icdf(enc, alloc_trim, trim_icdf, 7); tell = ec_tell_frac(enc); } @@ -1392,28 +1653,96 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, opus_val16 alpha; opus_int32 delta; /* The target rate in 8th bits per frame */ - opus_int32 target; + opus_int32 target, base_target; opus_int32 min_allowed; + int coded_bins; + int coded_bands; int lm_diff = st->mode->maxLM - LM; + coded_bands = st->lastCodedBands ? st->lastCodedBands : st->mode->nbEBands; + coded_bins = st->mode->eBands[coded_bands]<<LM; + if (C==2) + coded_bins += st->mode->eBands[IMIN(st->intensity, coded_bands)]<<LM; /* Don't attempt to use more than 510 kb/s, even for frames smaller than 20 ms. The CELT allocator will just not be able to use more than that anyway. */ nbCompressedBytes = IMIN(nbCompressedBytes,1275>>(3-LM)); - target = vbr_rate + (st->vbr_offset>>lm_diff) - ((40*C+20)<<BITRES); + target = vbr_rate - ((40*C+20)<<BITRES); + base_target = target; + + if (st->constrained_vbr) + target += (st->vbr_offset>>lm_diff); + + /*printf("%f %f %f %f %d %d ", st->analysis.activity, st->analysis.tonality, tf_estimate, st->stereo_saving, tot_boost, coded_bands);*/ +#ifndef FIXED_POINT + if (st->analysis.valid && st->analysis.activity<.4) + target -= (coded_bins<<BITRES)*1*(.4-st->analysis.activity); +#endif + /* Stereo savings */ + if (C==2) + { + int coded_stereo_bands; + int coded_stereo_dof; + coded_stereo_bands = IMIN(st->intensity, coded_bands); + coded_stereo_dof = (st->mode->eBands[coded_stereo_bands]<<LM)-coded_stereo_bands; + /*printf("%d %d %d ", coded_stereo_dof, coded_bins, tot_boost);*/ + target -= MIN32(target/3, SHR16(MULT16_16(st->stereo_saving,(coded_stereo_dof<<BITRES)),8)); + target += MULT16_16_Q15(QCONST16(0.035,15),coded_stereo_dof<<BITRES); + } + /* Limits starving of other bands when using dynalloc */ + target += tot_boost; + /* Compensates for the average transient boost */ + target = MULT16_32_Q15(QCONST16(0.96f,15),target); + /* Apply transient boost */ + target = SHL32(MULT16_32_Q15(tf_estimate, target),1); + +#ifndef FIXED_POINT + /* Apply tonality boost */ + if (st->analysis.valid) { + int tonal_target; + float tonal; + + /* Compensates for the average tonality boost */ + target -= MULT16_16_Q15(QCONST16(0.13f,15),coded_bins<<BITRES); + + tonal = MAX16(0,st->analysis.tonality-.2); + tonal_target = target + (coded_bins<<BITRES)*2.0f*tonal; + if (pitch_change) + tonal_target += (coded_bins<<BITRES)*.8; + /*printf("%f %f ", st->analysis.tonality, tonal);*/ + target = IMAX(tonal_target,target); + } +#endif - /* Shortblocks get a large boost in bitrate, but since they - are uncommon long blocks are not greatly affected */ - if (shortBlocks || tf_sum < -2*(st->end-st->start)) - target = 7*target/4; - else if (tf_sum < -(st->end-st->start)) - target = 3*target/2; - else if (M > 1) - target-=(target+14)/28; + { + opus_int32 floor_depth; + int bins; + bins = st->mode->eBands[st->mode->nbEBands-2]<<LM; + /*floor_depth = SHR32(MULT16_16((C*bins<<BITRES),celt_log2(SHL32(MAX16(1,sample_max),13))), DB_SHIFT);*/ + floor_depth = SHR32(MULT16_16((C*bins<<BITRES),maxDepth), DB_SHIFT); + floor_depth = IMAX(floor_depth, target>>2); + target = IMIN(target, floor_depth); + /*printf("%f %d\n", maxDepth, floor_depth);*/ + } + + if (st->constrained_vbr || st->bitrate<64000) + { + opus_val16 rate_factor; +#ifdef FIXED_POINT + rate_factor = MAX16(0,(st->bitrate-32000)); +#else + rate_factor = MAX16(0,(1.f/32768)*(st->bitrate-32000)); +#endif + if (st->constrained_vbr) + rate_factor = MIN16(rate_factor, QCONST16(0.67f, 15)); + target = base_target + MULT16_32_Q15(rate_factor, target-base_target); + + } + /* Don't allow more than doubling the rate */ + target = IMIN(2*base_target, target); /* The current offset is removed from the target and the space used so far is added*/ target=target+tell; - /* In VBR mode the frame size must not be reduced so much that it would result in the encoder running out of bits. The margin of 2 bytes ensures that none of the bust-prevention logic @@ -1453,8 +1782,11 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /*printf ("%d\n", st->vbr_reservoir);*/ /* Compute the offset we need to apply in order to reach the target */ - st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift); - st->vbr_offset = -st->vbr_drift; + if (st->constrained_vbr) + { + st->vbr_drift += (opus_int32)MULT16_32_Q15(alpha,(delta*(1<<lm_diff))-st->vbr_offset-st->vbr_drift); + st->vbr_offset = -st->vbr_drift; + } /*printf ("%d\n", st->vbr_drift);*/ if (st->constrained_vbr && st->vbr_reservoir < 0) @@ -1467,38 +1799,10 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /*printf ("+%d\n", adjust);*/ } nbCompressedBytes = IMIN(nbCompressedBytes,nbAvailableBytes+nbFilledBytes); + /*printf("%d\n", nbCompressedBytes*50*8);*/ /* This moves the raw bits to take into account the new compressed size */ ec_enc_shrink(enc, nbCompressedBytes); } - if (C==2) - { - int effectiveRate; - - /* Always use MS for 2.5 ms frames until we can do a better analysis */ - if (LM!=0) - dual_stereo = stereo_analysis(st->mode, X, LM, N); - - /* Account for coarse energy */ - effectiveRate = (8*effectiveBytes - 80)>>LM; - - /* effectiveRate in kb/s */ - effectiveRate = 2*effectiveRate/5; - if (effectiveRate<35) - intensity = 8; - else if (effectiveRate<50) - intensity = 12; - else if (effectiveRate<68) - intensity = 16; - else if (effectiveRate<84) - intensity = 18; - else if (effectiveRate<102) - intensity = 19; - else if (effectiveRate<130) - intensity = 20; - else - intensity = 100; - intensity = IMIN(st->end,IMAX(st->start, intensity)); - } /* Bit allocation */ ALLOC(fine_quant, st->mode->nbEBands, int); @@ -1510,7 +1814,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, anti_collapse_rsv = isTransient&&LM>=2&&bits>=((LM+2)<<BITRES) ? (1<<BITRES) : 0; bits -= anti_collapse_rsv; codedBands = compute_allocation(st->mode, st->start, st->end, offsets, cap, - alloc_trim, &intensity, &dual_stereo, bits, &balance, pulses, + alloc_trim, &st->intensity, &dual_stereo, bits, &balance, pulses, fine_quant, fine_priority, C, LM, enc, 1, st->lastCodedBands); st->lastCodedBands = codedBands; @@ -1530,7 +1834,7 @@ int celt_encode_with_ec(CELTEncoder * OPUS_RESTRICT st, const opus_val16 * pcm, /* Residual quantisation */ ALLOC(collapse_masks, C*st->mode->nbEBands, unsigned char); quant_all_bands(1, st->mode, st->start, st->end, X, C==2 ? X+N : NULL, collapse_masks, - bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, intensity, tf_res, + bandE, pulses, shortBlocks, st->spread_decision, dual_stereo, st->intensity, tf_res, nbCompressedBytes*(8<<BITRES)-anti_collapse_rsv, balance, enc, LM, codedBands, &st->rng); if (anti_collapse_rsv > 0) @@ -1882,6 +2186,13 @@ int opus_custom_encoder_ctl(CELTEncoder * OPUS_RESTRICT st, int request, ...) st->signalling = value; } break; + case CELT_SET_ANALYSIS_REQUEST: + { + AnalysisInfo *info = va_arg(ap, AnalysisInfo *); + if (info) + OPUS_COPY(&st->analysis, info, 1); + } + break; case CELT_GET_MODE_REQUEST: { const CELTMode ** value = va_arg(ap, const CELTMode**); @@ -2869,6 +3180,7 @@ const char *opus_strerror(int error) const char *opus_get_version_string(void) { return "libopus " OPUS_VERSION + "-exp_analysis" #ifdef FIXED_POINT "-fixed" #endif diff --git a/celt/celt.h b/celt/celt.h index 218cd883..4c04ddba 100644 --- a/celt/celt.h +++ b/celt/celt.h @@ -50,7 +50,20 @@ extern "C" { #define CELTDecoder OpusCustomDecoder #define CELTMode OpusCustomMode -#define _celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) +typedef struct { + int valid; + opus_val16 tonality; + opus_val16 tonality_slope; + opus_val16 noisiness; + opus_val16 activity; + int boost_band[2]; + opus_val16 boost_amount[2]; + opus_val16 music_prob; +}AnalysisInfo; + +#define __celt_check_mode_ptr_ptr(ptr) ((ptr) + ((ptr) - (const CELTMode**)(ptr))) + +#define __celt_check_analysis_ptr(ptr) ((ptr) + ((ptr) - (const AnalysisInfo*)(ptr))) /* Encoder/decoder Requests */ @@ -81,11 +94,18 @@ extern "C" { #define CELT_GET_MODE_REQUEST 10015 /** Get the CELTMode used by an encoder or decoder */ -#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, _celt_check_mode_ptr_ptr(x) +#define CELT_GET_MODE(x) CELT_GET_MODE_REQUEST, __celt_check_mode_ptr_ptr(x) #define CELT_SET_SIGNALLING_REQUEST 10016 #define CELT_SET_SIGNALLING(x) CELT_SET_SIGNALLING_REQUEST, __opus_check_int(x) +#define CELT_SET_TONALITY_REQUEST 10018 +#define CELT_SET_TONALITY(x) CELT_SET_TONALITY_REQUEST, __opus_check_int(x) +#define CELT_SET_TONALITY_SLOPE_REQUEST 10020 +#define CELT_SET_TONALITY_SLOPE(x) CELT_SET_TONALITY_SLOPE_REQUEST, __opus_check_int(x) + +#define CELT_SET_ANALYSIS_REQUEST 10022 +#define CELT_SET_ANALYSIS(x) CELT_SET_ANALYSIS_REQUEST, __celt_check_analysis_ptr(x) /* Encoder stuff */ diff --git a/celt/mathops.h b/celt/mathops.h index 4e977956..3c7486ad 100644 --- a/celt/mathops.h +++ b/celt/mathops.h @@ -43,6 +43,33 @@ unsigned isqrt32(opus_uint32 _val); +#ifndef OVERRIDE_CELT_MAXABS16 +static inline opus_val16 celt_maxabs16(const opus_val16 *x, int len) +{ + int i; + opus_val16 maxval = 0; + for (i=0;i<len;i++) + maxval = MAX16(maxval, ABS16(x[i])); + return maxval; +} +#endif + +#ifndef OVERRIDE_CELT_MAXABS32 +#ifdef FIXED_POINT +static inline opus_val32 celt_maxabs32(const opus_val32 *x, int len) +{ + int i; + opus_val32 maxval = 0; + for (i=0;i<len;i++) + maxval = MAX32(maxval, ABS32(x[i])); + return maxval; +} +#else +#define celt_maxabs32(x,len) celt_maxabs16(x,len) +#endif +#endif + + #ifndef FIXED_POINT #define PI 3.141592653f @@ -117,27 +144,6 @@ static inline opus_int16 celt_ilog2(opus_int32 x) } #endif -#ifndef OVERRIDE_CELT_MAXABS16 -static inline opus_val16 celt_maxabs16(opus_val16 *x, int len) -{ - int i; - opus_val16 maxval = 0; - for (i=0;i<len;i++) - maxval = MAX16(maxval, ABS16(x[i])); - return maxval; -} -#endif - -#ifndef OVERRIDE_CELT_MAXABS32 -static inline opus_val32 celt_maxabs32(opus_val32 *x, int len) -{ - int i; - opus_val32 maxval = 0; - for (i=0;i<len;i++) - maxval = MAX32(maxval, ABS32(x[i])); - return maxval; -} -#endif /** Integer log in base2. Defined for zero, but not for negative numbers */ static inline opus_int16 celt_zlog2(opus_val32 x) diff --git a/celt/mdct.c b/celt/mdct.c index 16a36c69..b300b2b5 100644 --- a/celt/mdct.c +++ b/celt/mdct.c @@ -109,12 +109,14 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar int N, N2, N4; kiss_twiddle_scalar sine; VARDECL(kiss_fft_scalar, f); + VARDECL(kiss_fft_scalar, f2); SAVE_STACK; N = l->n; N >>= shift; N2 = N>>1; N4 = N>>2; ALLOC(f, N2, kiss_fft_scalar); + ALLOC(f2, N2, kiss_fft_scalar); /* sin(x) ~= x here */ #ifdef FIXED_POINT sine = TRIG_UPSCALE*(QCONST16(0.7853981f, 15)+N2)/N; @@ -180,12 +182,12 @@ void clt_mdct_forward(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_scalar } /* N/4 complex FFT, down-scales by 4/N */ - opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)in); + opus_fft(l->kfft[shift], (kiss_fft_cpx *)f, (kiss_fft_cpx *)f2); /* Post-rotate */ { /* Temp pointers to make it really clear to the compiler what we're doing */ - const kiss_fft_scalar * OPUS_RESTRICT fp = in; + const kiss_fft_scalar * OPUS_RESTRICT fp = f2; kiss_fft_scalar * OPUS_RESTRICT yp1 = out; kiss_fft_scalar * OPUS_RESTRICT yp2 = out+stride*(N2-1); const kiss_twiddle_scalar *t = &l->trig[0]; diff --git a/celt/pitch.c b/celt/pitch.c index c2f08ec1..d9bba1b2 100644 --- a/celt/pitch.c +++ b/celt/pitch.c @@ -331,6 +331,7 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int T1, T1b; opus_val16 g1; opus_val16 cont=0; + opus_val16 thresh; T1 = (2*T0+k)/(2*k); if (T1 < minperiod) break; @@ -372,7 +373,14 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, cont = HALF32(prev_gain); else cont = 0; - if (g1 > QCONST16(.3f,15) + MULT16_16_Q15(QCONST16(.4f,15),g0)-cont) + thresh = MAX16(QCONST16(.3f,15), MULT16_16_Q15(QCONST16(.7,15),g0)-cont); + /* Bias against very high pitch (very short period) to avoid false-positives + due to short-term correlation */ + if (T1<3*minperiod) + thresh = MAX16(QCONST16(.4f,15), MULT16_16_Q15(QCONST16(.85,15),g0)-cont); + else if (T1<2*minperiod) + thresh = MAX16(QCONST16(.5f,15), MULT16_16_Q15(QCONST16(.9,15),g0)-cont); + if (g1 > thresh) { best_xy = xy; best_yy = yy; diff --git a/celt/quant_bands.c b/celt/quant_bands.c index b1d4eb15..241392f1 100644 --- a/celt/quant_bands.c +++ b/celt/quant_bands.c @@ -40,8 +40,8 @@ #include "rate.h" #ifdef FIXED_POINT -/* Mean energy in each band quantized in Q6 */ -static const signed char eMeans[25] = { +/* Mean energy in each band quantized in Q4 */ +const signed char eMeans[25] = { 103,100, 92, 85, 81, 77, 72, 70, 78, 75, 73, 71, 78, 74, 69, @@ -49,8 +49,8 @@ static const signed char eMeans[25] = { 60, 60, 60, 60, 60 }; #else -/* Mean energy in each band quantized in Q6 and converted back to float */ -static const opus_val16 eMeans[25] = { +/* Mean energy in each band quantized in Q4 and converted back to float */ +const opus_val16 eMeans[25] = { 6.437500f, 6.250000f, 5.750000f, 5.312500f, 5.062500f, 4.812500f, 4.500000f, 4.375000f, 4.875000f, 4.687500f, 4.562500f, 4.437500f, 4.875000f, 4.625000f, 4.312500f, diff --git a/celt/quant_bands.h b/celt/quant_bands.h index bec2855c..b3187fad 100644 --- a/celt/quant_bands.h +++ b/celt/quant_bands.h @@ -35,6 +35,12 @@ #include "entdec.h" #include "mathops.h" +#ifdef FIXED_POINT +extern const signed char eMeans[25]; +#else +extern const opus_val16 eMeans[25]; +#endif + void amp2Log2(const CELTMode *m, int effEnd, int end, celt_ener *bandE, opus_val16 *bandLogE, int C); diff --git a/opus_headers.mk b/opus_headers.mk index f160710c..43a978cd 100644 --- a/opus_headers.mk +++ b/opus_headers.mk @@ -1,4 +1,7 @@ OPUS_HEAD = \ include/opus.h \ include/opus_multistream.h \ -src/opus_private.h +src/opus_private.h \ +src/analysis.h \ +src/mlp.h \ +src/tansig_table.h diff --git a/opus_sources.mk b/opus_sources.mk index 384b036a..81eaef06 100644 --- a/opus_sources.mk +++ b/opus_sources.mk @@ -3,3 +3,8 @@ src/opus_decoder.c \ src/opus_encoder.c \ src/opus_multistream.c \ src/repacketizer.c + +OPUS_SOURCES_FLOAT = \ +src/analysis.c \ +src/mlp.c \ +src/mlp_data.c diff --git a/src/analysis.c b/src/analysis.c new file mode 100644 index 00000000..fde65493 --- /dev/null +++ b/src/analysis.c @@ -0,0 +1,460 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "kiss_fft.h" +#include "celt.h" +#include "modes.h" +#include "arch.h" +#include "quant_bands.h" +#include <stdio.h> +#include "analysis.h" +#include "mlp.h" + +extern const MLP net; + +#ifndef M_PI +#define M_PI 3.141592653 +#endif + +static const float dct_table[128] = { + 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, + 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, 0.250000, + 0.351851, 0.338330, 0.311806, 0.273300, 0.224292, 0.166664, 0.102631, 0.034654, + -0.034654, -0.102631, -0.166664, -0.224292, -0.273300, -0.311806, -0.338330, -0.351851, + 0.346760, 0.293969, 0.196424, 0.068975, -0.068975, -0.196424, -0.293969, -0.346760, + -0.346760, -0.293969, -0.196424, -0.068975, 0.068975, 0.196424, 0.293969, 0.346760, + 0.338330, 0.224292, 0.034654, -0.166664, -0.311806, -0.351851, -0.273300, -0.102631, + 0.102631, 0.273300, 0.351851, 0.311806, 0.166664, -0.034654, -0.224292, -0.338330, + 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, + 0.326641, 0.135299, -0.135299, -0.326641, -0.326641, -0.135299, 0.135299, 0.326641, + 0.311806, 0.034654, -0.273300, -0.338330, -0.102631, 0.224292, 0.351851, 0.166664, + -0.166664, -0.351851, -0.224292, 0.102631, 0.338330, 0.273300, -0.034654, -0.311806, + 0.293969, -0.068975, -0.346760, -0.196424, 0.196424, 0.346760, 0.068975, -0.293969, + -0.293969, 0.068975, 0.346760, 0.196424, -0.196424, -0.346760, -0.068975, 0.293969, + 0.273300, -0.166664, -0.338330, 0.034654, 0.351851, 0.102631, -0.311806, -0.224292, + 0.224292, 0.311806, -0.102631, -0.351851, -0.034654, 0.338330, 0.166664, -0.273300, +}; + +static const float analysis_window[240] = { + 0.000043f, 0.000171f, 0.000385f, 0.000685f, 0.001071f, 0.001541f, 0.002098f, 0.002739f, + 0.003466f, 0.004278f, 0.005174f, 0.006156f, 0.007222f, 0.008373f, 0.009607f, 0.010926f, + 0.012329f, 0.013815f, 0.015385f, 0.017037f, 0.018772f, 0.020590f, 0.022490f, 0.024472f, + 0.026535f, 0.028679f, 0.030904f, 0.033210f, 0.035595f, 0.038060f, 0.040604f, 0.043227f, + 0.045928f, 0.048707f, 0.051564f, 0.054497f, 0.057506f, 0.060591f, 0.063752f, 0.066987f, + 0.070297f, 0.073680f, 0.077136f, 0.080665f, 0.084265f, 0.087937f, 0.091679f, 0.095492f, + 0.099373f, 0.103323f, 0.107342f, 0.111427f, 0.115579f, 0.119797f, 0.124080f, 0.128428f, + 0.132839f, 0.137313f, 0.141849f, 0.146447f, 0.151105f, 0.155823f, 0.160600f, 0.165435f, + 0.170327f, 0.175276f, 0.180280f, 0.185340f, 0.190453f, 0.195619f, 0.200838f, 0.206107f, + 0.211427f, 0.216797f, 0.222215f, 0.227680f, 0.233193f, 0.238751f, 0.244353f, 0.250000f, + 0.255689f, 0.261421f, 0.267193f, 0.273005f, 0.278856f, 0.284744f, 0.290670f, 0.296632f, + 0.302628f, 0.308658f, 0.314721f, 0.320816f, 0.326941f, 0.333097f, 0.339280f, 0.345492f, + 0.351729f, 0.357992f, 0.364280f, 0.370590f, 0.376923f, 0.383277f, 0.389651f, 0.396044f, + 0.402455f, 0.408882f, 0.415325f, 0.421783f, 0.428254f, 0.434737f, 0.441231f, 0.447736f, + 0.454249f, 0.460770f, 0.467298f, 0.473832f, 0.480370f, 0.486912f, 0.493455f, 0.500000f, + 0.506545f, 0.513088f, 0.519630f, 0.526168f, 0.532702f, 0.539230f, 0.545751f, 0.552264f, + 0.558769f, 0.565263f, 0.571746f, 0.578217f, 0.584675f, 0.591118f, 0.597545f, 0.603956f, + 0.610349f, 0.616723f, 0.623077f, 0.629410f, 0.635720f, 0.642008f, 0.648271f, 0.654508f, + 0.660720f, 0.666903f, 0.673059f, 0.679184f, 0.685279f, 0.691342f, 0.697372f, 0.703368f, + 0.709330f, 0.715256f, 0.721144f, 0.726995f, 0.732807f, 0.738579f, 0.744311f, 0.750000f, + 0.755647f, 0.761249f, 0.766807f, 0.772320f, 0.777785f, 0.783203f, 0.788573f, 0.793893f, + 0.799162f, 0.804381f, 0.809547f, 0.814660f, 0.819720f, 0.824724f, 0.829673f, 0.834565f, + 0.839400f, 0.844177f, 0.848895f, 0.853553f, 0.858151f, 0.862687f, 0.867161f, 0.871572f, + 0.875920f, 0.880203f, 0.884421f, 0.888573f, 0.892658f, 0.896677f, 0.900627f, 0.904508f, + 0.908321f, 0.912063f, 0.915735f, 0.919335f, 0.922864f, 0.926320f, 0.929703f, 0.933013f, + 0.936248f, 0.939409f, 0.942494f, 0.945503f, 0.948436f, 0.951293f, 0.954072f, 0.956773f, + 0.959396f, 0.961940f, 0.964405f, 0.966790f, 0.969096f, 0.971321f, 0.973465f, 0.975528f, + 0.977510f, 0.979410f, 0.981228f, 0.982963f, 0.984615f, 0.986185f, 0.987671f, 0.989074f, + 0.990393f, 0.991627f, 0.992778f, 0.993844f, 0.994826f, 0.995722f, 0.996534f, 0.997261f, + 0.997902f, 0.998459f, 0.998929f, 0.999315f, 0.999615f, 0.999829f, 0.999957f, 1.000000f, +}; + +static const int tbands[NB_TBANDS+1] = { + 2, 4, 6, 8, 10, 12, 14, 16, 20, 24, 28, 32, 40, 48, 56, 68, 80, 96, 120 +}; + +/*static const float tweight[NB_TBANDS+1] = { + .3, .4, .5, .6, .7, .8, .9, 1., 1., 1., 1., 1., 1., 1., .8, .7, .6, .5 +};*/ + +#define NB_TONAL_SKIP_BANDS 9 + +#define cA 0.43157974f +#define cB 0.67848403f +#define cC 0.08595542f +#define cE (M_PI/2) +static inline float fast_atan2f(float y, float x) { + float x2, y2; + /* Should avoid underflow on the values we'll get */ + if (ABS16(x)+ABS16(y)<1e-9) + { + x*=1e12; + y*=1e12; + } + x2 = x*x; + y2 = y*y; + if(x2<y2){ + float den = (y2 + cB*x2) * (y2 + cC*x2); + if (den!=0) + return -x*y*(y2 + cA*x2) / den + copysignf(cE,y); + else + return copysignf(cE,y); + }else{ + float den = (x2 + cB*y2) * (x2 + cC*y2); + if (den!=0) + return x*y*(x2 + cA*y2) / den + copysignf(cE,y) - copysignf(cE,x*y); + else + return copysignf(cE,y) - copysignf(cE,x*y); + } +} + +void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEncoder *celt_enc, const opus_val16 *x, int C) +{ + int i, b; + const CELTMode *mode; + const kiss_fft_state *kfft; + kiss_fft_cpx in[480], out[480]; + int N = 480, N2=240; + float * restrict A = tonal->angle; + float * restrict dA = tonal->d_angle; + float * restrict d2A = tonal->d2_angle; + float tonality[240]; + float noisiness[240]; + float band_tonality[NB_TBANDS]; + float logE[NB_TBANDS]; + float BFCC[8]; + float features[100]; + float frame_tonality; + float max_frame_tonality; + float tw_sum=0; + float frame_noisiness; + const float pi4 = M_PI*M_PI*M_PI*M_PI; + float slope=0; + float frame_stationarity; + float relativeE; + float frame_prob; + float alpha, alphaE, alphaE2; + float frame_loudness; + float bandwidth_mask; + int bandwidth=0; + float bandE[NB_TBANDS]; + celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode)); + + tonal->last_transition++; + alpha = 1.f/IMIN(20, 1+tonal->count); + alphaE = 1.f/IMIN(50, 1+tonal->count); + alphaE2 = 1.f/IMIN(6000, 1+tonal->count); + + if (tonal->count<4) + tonal->music_prob = .5; + kfft = mode->mdct.kfft[0]; + if (C==1) + { + for (i=0;i<N2;i++) + { + float w = analysis_window[i]; + in[i].r = MULT16_16(w, x[i]); + in[i].i = MULT16_16(w, x[N-N2+i]); + in[N-i-1].r = MULT16_16(w, x[N-i-1]); + in[N-i-1].i = MULT16_16(w, x[2*N-N2-i-1]); + } + } else { + for (i=0;i<N2;i++) + { + float w = analysis_window[i]; + in[i].r = MULT16_16(w, x[2*i]+x[2*i+1]); + in[i].i = MULT16_16(w, x[2*(N-N2+i)]+x[2*(N-N2+i)+1]); + in[N-i-1].r = MULT16_16(w, x[2*(N-i-1)]+x[2*(N-i-1)+1]); + in[N-i-1].i = MULT16_16(w, x[2*(2*N-N2-i-1)]+x[2*(2*N-N2-i-1)+1]); + } + } + opus_fft(kfft, in, out); + + for (i=1;i<N2;i++) + { + float X1r, X2r, X1i, X2i; + float angle, d_angle, d2_angle; + float angle2, d_angle2, d2_angle2; + float mod1, mod2, avg_mod; + X1r = out[i].r+out[N-i].r; + X1i = out[i].i-out[N-i].i; + X2r = out[i].i+out[N-i].i; + X2i = out[N-i].r-out[i].r; + + angle = (.5/M_PI)*fast_atan2f(X1i, X1r); + d_angle = angle - A[i]; + d2_angle = d_angle - dA[i]; + + angle2 = (.5/M_PI)*fast_atan2f(X2i, X2r); + d_angle2 = angle2 - angle; + d2_angle2 = d_angle2 - d_angle; + + mod1 = d2_angle - floor(.5+d2_angle); + noisiness[i] = fabs(mod1); + mod1 *= mod1; + mod1 *= mod1; + + mod2 = d2_angle2 - floor(.5+d2_angle2); + noisiness[i] += fabs(mod2); + mod2 *= mod2; + mod2 *= mod2; + + avg_mod = .25*(d2A[i]+2*mod1+mod2); + tonality[i] = 1./(1+40*16*pi4*avg_mod)-.015; + + A[i] = angle2; + dA[i] = d_angle2; + d2A[i] = mod2; + } + + frame_tonality = 0; + max_frame_tonality = 0; + tw_sum = 0; + info->activity = 0; + frame_noisiness = 0; + frame_stationarity = 0; + if (!tonal->count) + { + for (b=0;b<NB_TBANDS;b++) + { + tonal->lowE[b] = 1e10; + tonal->highE[b] = -1e10; + } + } + relativeE = 0; + info->boost_amount[0]=info->boost_amount[1]=0; + info->boost_band[0]=info->boost_band[1]=0; + frame_loudness = 0; + bandwidth_mask = 0; + for (b=0;b<NB_TBANDS;b++) + { + float E=0, tE=0, nE=0; + float L1, L2; + float stationarity; + for (i=tbands[b];i<tbands[b+1];i++) + { + float binE = out[i].r*out[i].r + out[N-i].r*out[N-i].r + + out[i].i*out[i].i + out[N-i].i*out[N-i].i; + E += binE; + tE += binE*tonality[i]; + nE += binE*2*(.5-noisiness[i]); + } + bandE[b] = E; + tonal->E[tonal->E_count][b] = E; + frame_noisiness += nE/(1e-15+E); + + frame_loudness += sqrt(E+1e-10); + /* Add a reasonable noise floor */ + tonal->meanE[b] = (1-alphaE2)*tonal->meanE[b] + alphaE2*E; + tonal->meanRE[b] = (1-alphaE2)*tonal->meanRE[b] + alphaE2*sqrt(E); + /* 13 dB slope for spreading function */ + bandwidth_mask = MAX32(.05*bandwidth_mask, E); + /* Checks if band looks like stationary noise or if it's below a (trivial) masking curve */ + if (tonal->meanRE[b]*tonal->meanRE[b] < tonal->meanE[b]*.95 && E>.1*bandwidth_mask) + bandwidth = b; + logE[b] = log(E+1e-10); + tonal->lowE[b] = MIN32(logE[b], tonal->lowE[b]+.01); + tonal->highE[b] = MAX32(logE[b], tonal->highE[b]-.1); + if (tonal->highE[b] < tonal->lowE[b]+1) + { + tonal->highE[b]+=.5; + tonal->lowE[b]-=.5; + } + relativeE += (logE[b]-tonal->lowE[b])/(EPSILON+tonal->highE[b]-tonal->lowE[b]); + + L1=L2=0; + for (i=0;i<NB_FRAMES;i++) + { + L1 += sqrt(tonal->E[i][b]); + L2 += tonal->E[i][b]; + } + + stationarity = MIN16(0.99,L1/sqrt(EPSILON+NB_FRAMES*L2)); + stationarity *= stationarity; + stationarity *= stationarity; + frame_stationarity += stationarity; + /*band_tonality[b] = tE/(1e-15+E)*/; + band_tonality[b] = MAX16(tE/(EPSILON+E), stationarity*tonal->prev_band_tonality[b]); +#if 0 + if (b>=NB_TONAL_SKIP_BANDS) + { + frame_tonality += tweight[b]*band_tonality[b]; + tw_sum += tweight[b]; + } +#else + frame_tonality += band_tonality[b]; + if (b>=NB_TBANDS-NB_TONAL_SKIP_BANDS) + frame_tonality -= band_tonality[b-NB_TBANDS+NB_TONAL_SKIP_BANDS]; +#endif + max_frame_tonality = MAX16(max_frame_tonality, (1+.03*(b-NB_TBANDS))*frame_tonality); + slope += band_tonality[b]*(b-8); + /*printf("%f %f ", band_tonality[b], stationarity);*/ + if (band_tonality[b] > info->boost_amount[1] && b>=7 && b < NB_TBANDS-1) + { + if (band_tonality[b] > info->boost_amount[0]) + { + info->boost_amount[1] = info->boost_amount[0]; + info->boost_band[1] = info->boost_band[0]; + info->boost_amount[0] = band_tonality[b]; + info->boost_band[0] = b; + } else { + info->boost_amount[1] = band_tonality[b]; + info->boost_band[1] = b; + } + } + tonal->prev_band_tonality[b] = band_tonality[b]; + } + + frame_loudness = 20*log10(frame_loudness); + tonal->Etracker = MAX32(tonal->Etracker-.03, frame_loudness); + tonal->lowECount *= (1-alphaE); + if (frame_loudness < tonal->Etracker-30) + tonal->lowECount += alphaE; + + for (i=0;i<8;i++) + { + float sum=0; + for (b=0;b<16;b++) + sum += dct_table[i*16+b]*logE[b]; + BFCC[i] = sum; + } + + frame_stationarity /= NB_TBANDS; + relativeE /= NB_TBANDS; + if (tonal->count<10) + relativeE = .5; + frame_noisiness /= NB_TBANDS; +#if 1 + info->activity = frame_noisiness + (1-frame_noisiness)*relativeE; +#else + info->activity = .5*(1+frame_noisiness-frame_stationarity); +#endif + frame_tonality = (max_frame_tonality/(NB_TBANDS-NB_TONAL_SKIP_BANDS)); + frame_tonality = MAX16(frame_tonality, tonal->prev_tonality*.8); + tonal->prev_tonality = frame_tonality; + info->boost_amount[0] -= frame_tonality+.2; + info->boost_amount[1] -= frame_tonality+.2; + if (band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]+1]+.15 + || band_tonality[info->boost_band[0]] < band_tonality[info->boost_band[0]-1]+.15) + info->boost_amount[0]=0; + if (band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]+1]+.15 + || band_tonality[info->boost_band[1]] < band_tonality[info->boost_band[1]-1]+.15) + info->boost_amount[1]=0; + + slope /= 8*8; + info->tonality_slope = slope; + + tonal->E_count = (tonal->E_count+1)%NB_FRAMES; + tonal->count++; + info->tonality = frame_tonality; + + for (i=0;i<4;i++) + features[i] = -0.12299*(BFCC[i]+tonal->mem[i+24]) + 0.49195*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693*tonal->mem[i+8] - 1.4349*tonal->cmean[i]; + + for (i=0;i<4;i++) + tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*BFCC[i]; + + for (i=0;i<4;i++) + features[4+i] = 0.63246*(BFCC[i]-tonal->mem[i+24]) + 0.31623*(tonal->mem[i]-tonal->mem[i+16]); + for (i=0;i<3;i++) + features[8+i] = 0.53452*(BFCC[i]+tonal->mem[i+24]) - 0.26726*(tonal->mem[i]+tonal->mem[i+16]) -0.53452*tonal->mem[i+8]; + + if (tonal->count > 5) + { + for (i=0;i<9;i++) + tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[i]*features[i]; + } + + for (i=0;i<8;i++) + { + tonal->mem[i+24] = tonal->mem[i+16]; + tonal->mem[i+16] = tonal->mem[i+8]; + tonal->mem[i+8] = tonal->mem[i]; + tonal->mem[i] = BFCC[i]; + } + for (i=0;i<9;i++) + features[11+i] = sqrt(tonal->std[i]); + features[20] = info->tonality; + features[21] = info->activity; + features[22] = frame_stationarity; + features[23] = info->tonality_slope; + features[24] = tonal->lowECount; + +#ifndef FIXED_POINT + mlp_process(&net, features, &frame_prob); + frame_prob = .5*(frame_prob+1); + /* Curve fitting between the MLP probability and the actual probability */ + frame_prob = .01 + 1.21*frame_prob*frame_prob - .23*pow(frame_prob, 10); + + /*printf("%f\n", frame_prob);*/ + { + float tau, beta; + float p0, p1; + float max_certainty; + /* One transition every 3 minutes */ + tau = .00005; + beta = .1; + max_certainty = .01+1.f/(20+.5*tonal->last_transition); + p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; + p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; + p0 *= pow(1-frame_prob, beta); + p1 *= pow(frame_prob, beta); + tonal->music_prob = MAX16(max_certainty, MIN16(1-max_certainty, p1/(p0+p1))); + info->music_prob = tonal->music_prob; + /*printf("%f %f\n", frame_prob, info->music_prob);*/ + } + if (tonal->last_music != (tonal->music_prob>.5)) + tonal->last_transition=0; + tonal->last_music = tonal->music_prob>.5; +#else + info->music_prob = 0; +#endif + /*for (i=0;i<25;i++) + printf("%f ", features[i]); + printf("\n");*/ + + /* FIXME: Can't detect SWB for now because the last band ends at 12 kHz */ + if (bandwidth == NB_TBANDS-1 || tonal->count<100) + { + tonal->opus_bandwidth = OPUS_BANDWIDTH_FULLBAND; + } else { + int close_enough = 0; + if (bandE[bandwidth-1] < 3000*bandE[NB_TBANDS-1] && bandwidth < NB_TBANDS-1) + close_enough=1; + if (bandwidth<=11 || (bandwidth==12 && close_enough)) + tonal->opus_bandwidth = OPUS_BANDWIDTH_NARROWBAND; + else if (bandwidth<=13) + tonal->opus_bandwidth = OPUS_BANDWIDTH_MEDIUMBAND; + else if (bandwidth<=15 || (bandwidth==16 && close_enough)) + tonal->opus_bandwidth = OPUS_BANDWIDTH_WIDEBAND; + } + info->noisiness = frame_noisiness; + info->valid = 1; +} diff --git a/src/analysis.h b/src/analysis.h new file mode 100644 index 00000000..09d1036a --- /dev/null +++ b/src/analysis.h @@ -0,0 +1,60 @@ +/* Copyright (c) 2011 Xiph.Org Foundation + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef ANALYSIS_H +#define ANALYSIS_H + +#define NB_FRAMES 8 +#define NB_TBANDS 18 + + +typedef struct { + float angle[240]; + float d_angle[240]; + float d2_angle[240]; + float prev_band_tonality[NB_TBANDS]; + float prev_tonality; + float E[NB_FRAMES][NB_TBANDS]; + float lowE[NB_TBANDS], highE[NB_TBANDS]; + float meanE[NB_TBANDS], meanRE[NB_TBANDS]; + float mem[32]; + float cmean[8]; + float std[9]; + float music_prob; + float Etracker; + float lowECount; + int E_count; + int last_music; + int last_transition; + int count; + int opus_bandwidth; +} TonalityAnalysisState; + +void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, + CELTEncoder *celt_enc, const opus_val16 *x, int C); + +#endif diff --git a/src/mlp.c b/src/mlp.c new file mode 100644 index 00000000..dd3690db --- /dev/null +++ b/src/mlp.c @@ -0,0 +1,109 @@ +/* Copyright (c) 2008-2011 Octasic Inc. + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include <math.h> +#include "mlp.h" +#include "arch.h" +#include "tansig_table.h" +#define MAX_NEURONS 100 + +#ifdef FIXED_POINT +extern const opus_val16 tansig_table[501]; +static inline opus_val16 tansig_approx(opus_val32 _x) /* Q19 */ +{ + int i; + opus_val16 xx; /* Q11 */ + /*double x, y;*/ + opus_val16 dy, yy; /* Q14 */ + /*x = 1.9073e-06*_x;*/ + if (_x>=QCONST32(10,19)) + return QCONST32(1.,14); + if (_x<=-QCONST32(10,19)) + return -QCONST32(1.,14); + xx = EXTRACT16(SHR32(_x, 8)); + /*i = lrint(25*x);*/ + i = SHR32(ADD32(1024,MULT16_16(25, xx)),11); + /*x -= .04*i;*/ + xx -= EXTRACT16(SHR32(MULT16_16(20972,i),8)); + /*x = xx*(1./2048);*/ + /*y = tansig_table[250+i];*/ + yy = tansig_table[250+i]; + /*y = yy*(1./16384);*/ + dy = 16384-MULT16_16_Q14(yy,yy); + yy = yy + MULT16_16_Q14(MULT16_16_Q11(xx,dy),(16384 - MULT16_16_Q11(yy,xx))); + return yy; +} +#else +/*extern const float tansig_table[501];*/ +static inline double tansig_approx(double x) +{ + int i; + double y, dy; + double sign=1; + if (x>=8) + return 1; + if (x<=-8) + return -1; + if (x<0) + { + x=-x; + sign=-1; + } + i = lrint(25*x); + x -= .04*i; + y = tansig_table[i]; + dy = 1-y*y; + y = y + x*dy*(1 - y*x); + return sign*y; +} +#endif + +void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out) +{ + int j; + opus_val16 hidden[MAX_NEURONS]; + const opus_val16 *W = m->weights; + /* Copy to tmp_in */ + for (j=0;j<m->topo[1];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),8); + for (k=0;k<m->topo[0];k++) + sum = MAC16_16(sum, in[k],*W++); + hidden[j] = tansig_approx(sum); + } + for (j=0;j<m->topo[2];j++) + { + int k; + opus_val32 sum = SHL32(EXTEND32(*W++),14); + for (k=0;k<m->topo[1];k++) + sum = MAC16_16(sum, hidden[k], *W++); + out[j] = tansig_approx(EXTRACT16(PSHR32(sum,17))); + } +} + diff --git a/src/mlp.h b/src/mlp.h new file mode 100644 index 00000000..68ff68d8 --- /dev/null +++ b/src/mlp.h @@ -0,0 +1,41 @@ +/* Copyright (c) 2008-2011 Octasic Inc. + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _MLP_H_ +#define _MLP_H_ + +#include "arch.h" + +typedef struct { + int layers; + const int *topo; + const opus_val16 *weights; +} MLP; + +void mlp_process(const MLP *m, const opus_val16 *in, opus_val16 *out); + +#endif /* _MLP_H_ */ diff --git a/src/mlp_data.c b/src/mlp_data.c new file mode 100644 index 00000000..dbc7cec3 --- /dev/null +++ b/src/mlp_data.c @@ -0,0 +1,73 @@ +#include "mlp.h" + +/* RMS error was 0.179835, seed was 1322103961 */ + +static const float weights[271] = { + +/* hidden layer */ +1.55597, -0.0739792, -0.0646761, -0.099531, -0.0794943, +0.0180174, -0.0391354, 0.0508224, -0.0160169, -0.0773263, +-0.0300002, -0.0865361, 0.124477, -0.28648, -0.0860702, +-0.518949, -0.0873341, -0.235393, -0.907833, -0.383573, +0.535388, -0.57944, 0.98116, 0.8482, 1.12426, +-3.23721, -0.647072, -0.0265139, 0.0711052, -0.00125666, +-0.0396181, -0.44282, -0.510495, -0.201865, 0.0134336, +-0.167205, -0.155406, 0.00041678, -0.00468705, -0.0233224, +0.264279, -0.301375, 0.00234895, 0.0144741, -0.137535, +0.200323, 0.0192027, 3.19818, 2.03495, 0.705517, +-4.6025, -0.11485, -0.792716, 0.150714, 0.10608, +0.240633, 0.0690698, 0.0695297, 0.124819, 0.0501433, +0.0460952, 0.147639, 0.10327, 0.158007, 0.113714, +0.0276191, 0.0680749, -0.130012, 0.0796126, 0.133067, +0.51495, 0.747578, -0.128742, 5.98112, -1.16698, +-0.276492, -1.73549, -3.90234, 2.01489, -0.040118, +-0.113002, -0.146751, -0.113569, 0.0534873, 0.0989832, +0.0872875, 0.049266, 0.0367557, -0.00889148, -0.0648461, +-0.00190352, 0.0143773, 0.0259364, -0.0592133, -0.0672924, +0.1399, -0.0987886, -0.347402, 0.101326, -0.0680876, +0.469186, 0.246922, 10.4017, 3.44846, -0.662725, +-0.0328208, -0.0561274, -0.0167744, 0.00044282, -0.0457645, +-0.0408314, -0.013113, -0.0373873, -0.0474122, -0.0273745, +-0.0308505, 0.000582959, -0.0421135, 0.464859, 0.196842, +0.320538, 0.0435528, -0.200168, 0.266475, -0.0853727, +1.20397, 0.711542, -1.04397, -1.47759, 1.26768, +0.446958, 0.266477, -0.30802, 0.28431, -0.118541, +0.00836345, 0.0689026, -0.0137996, -0.0395417, 0.26982, +-0.206255, 0.16066, 0.114757, 0.359587, -0.106503, +-0.0948534, 0.175358, -0.122966, -0.0056675, 0.483848, +-0.134916, -0.427567, -0.140172, -1.0866, -2.73921, +0.549843, 0.17685, 0.0010675, -0.00137386, 0.0884424, +-0.0698736, -0.00174136, 0.0718775, -0.0396849, 0.0448056, +0.0577853, -0.0372353, 0.134599, 0.0260656, 0.140322, +0.22704, -0.020568, -0.0142424, -0.21723, -0.997704, +-0.884573, -0.163495, 2.33617, 0.224142, 0.19635, +-0.957387, 0.144678, 1.47035, -0.00700498, -0.0472309, +-0.0137848, -0.0189145, 0.00856479, 0.0316965, 0.00613373, +0.00209807, 0.00270964, -0.0490206, 0.0105712, -0.0465045, +-0.0381532, -0.0985268, -0.108297, 0.0146409, -0.0040718, +-0.0698572, -0.380568, -0.230479, 3.98917, 0.457652, +-1.02355, -7.4435, -0.475314, 1.61743, 0.0254017, +-0.00791293, 0.047217, 0.0220995, -0.0304311, 0.0052168, +-0.0404054, -0.0230293, 0.00169229, -0.0138178, 0.0043137, +-0.0598088, -0.133601, 0.0555138, -0.177358, -0.159856, +-0.137281, 0.108051, -0.305973, 0.393775, 0.0747287, +0.783993, -0.875086, 1.06862, 0.340519, -0.352681, +-0.0830912, -0.100017, 0.0729085, -0.00829403, 0.027489, +-0.0779597, 0.082286, -0.164181, -0.41519, 0.00282335, +-0.29573, 0.125571, 0.726935, 0.392137, 0.491348, +0.0723196, -0.0259758, -0.0636332, -0.452384, -0.000225974, +-2.34001, 2.45211, -0.544628, 5.62944, -3.44507, + +/* output layer */ +-3.13835, 0.994751, 0.444901, 1.59518, 1.23665, +3.37012, -1.34606, 1.99131, 1.33476, 1.3885, +1.12559, }; + +static const int topo[3] = {25, 10, 1}; + +const MLP net = { + 3, + topo, + weights +}; + diff --git a/src/mlp_train.c b/src/mlp_train.c new file mode 100644 index 00000000..6421c17d --- /dev/null +++ b/src/mlp_train.c @@ -0,0 +1,496 @@ +/* Copyright (c) 2008-2011 Octasic Inc. + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +#include "mlp_train.h" +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <semaphore.h> +#include <pthread.h> +#include <time.h> +#include <signal.h> + +int stopped = 0; + +void handler(int sig) +{ + stopped = 1; + signal(sig, handler); +} + +MLPTrain * mlp_init(int *topo, int nbLayers, float *inputs, float *outputs, int nbSamples) +{ + int i, j, k; + MLPTrain *net; + int inDim, outDim; + net = malloc(sizeof(*net)); + net->topo = malloc(nbLayers*sizeof(net->topo[0])); + for (i=0;i<nbLayers;i++) + net->topo[i] = topo[i]; + inDim = topo[0]; + outDim = topo[nbLayers-1]; + net->in_rate = malloc((inDim+1)*sizeof(net->in_rate[0])); + net->weights = malloc((nbLayers-1)*sizeof(net->weights)); + net->best_weights = malloc((nbLayers-1)*sizeof(net->weights)); + for (i=0;i<nbLayers-1;i++) + { + net->weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0])); + net->best_weights[i] = malloc((topo[i]+1)*topo[i+1]*sizeof(net->weights[0][0])); + } + double inMean[inDim]; + for (j=0;j<inDim;j++) + { + double std=0; + inMean[j] = 0; + for (i=0;i<nbSamples;i++) + { + inMean[j] += inputs[i*inDim+j]; + std += inputs[i*inDim+j]*inputs[i*inDim+j]; + } + inMean[j] /= nbSamples; + std /= nbSamples; + net->in_rate[1+j] = .5/(.0001+std); + std = std-inMean[j]*inMean[j]; + if (std<.001) + std = .001; + std = 1/sqrt(inDim*std); + for (k=0;k<topo[1];k++) + net->weights[0][k*(topo[0]+1)+j+1] = randn(std); + } + net->in_rate[0] = 1; + for (j=0;j<topo[1];j++) + { + double sum = 0; + for (k=0;k<inDim;k++) + sum += inMean[k]*net->weights[0][j*(topo[0]+1)+k+1]; + net->weights[0][j*(topo[0]+1)] = -sum; + } + for (j=0;j<outDim;j++) + { + double mean = 0; + double std; + for (i=0;i<nbSamples;i++) + mean += outputs[i*outDim+j]; + mean /= nbSamples; + std = 1/sqrt(topo[nbLayers-2]); + net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)] = mean; + for (k=0;k<topo[nbLayers-2];k++) + net->weights[nbLayers-2][j*(topo[nbLayers-2]+1)+k+1] = randn(std); + } + return net; +} + +#define MAX_NEURONS 100 + +double compute_gradient(MLPTrain *net, float *inputs, float *outputs, int nbSamples, double *W0_grad, double *W1_grad, double *error_rate) +{ + int i,j; + int s; + int inDim, outDim, hiddenDim; + int *topo; + double *W0, *W1; + double rms=0; + int W0_size, W1_size; + double hidden[MAX_NEURONS]; + double netOut[MAX_NEURONS]; + double error[MAX_NEURONS]; + + *error_rate = 0; + topo = net->topo; + inDim = net->topo[0]; + hiddenDim = net->topo[1]; + outDim = net->topo[2]; + W0_size = (topo[0]+1)*topo[1]; + W1_size = (topo[1]+1)*topo[2]; + W0 = net->weights[0]; + W1 = net->weights[1]; + memset(W0_grad, 0, W0_size*sizeof(double)); + memset(W1_grad, 0, W1_size*sizeof(double)); + for (i=0;i<outDim;i++) + netOut[i] = outputs[i]; + for (s=0;s<nbSamples;s++) + { + float *in, *out; + in = inputs+s*inDim; + out = outputs + s*outDim; + for (i=0;i<hiddenDim;i++) + { + double sum = W0[i*(inDim+1)]; + for (j=0;j<inDim;j++) + sum += W0[i*(inDim+1)+j+1]*in[j]; + hidden[i] = tansig_approx(sum); + } + for (i=0;i<outDim;i++) + { + double sum = W1[i*(hiddenDim+1)]; + for (j=0;j<hiddenDim;j++) + sum += W1[i*(hiddenDim+1)+j+1]*hidden[j]; + netOut[i] = tansig_approx(sum); + error[i] = out[i] - netOut[i]; + rms += error[i]*error[i]; + *error_rate += fabs(error[i])>1; + /*error[i] = error[i]/(1+fabs(error[i]));*/ + } + /* Back-propagate error */ + for (i=0;i<outDim;i++) + { + float grad = 1-netOut[i]*netOut[i]; + W1_grad[i*(hiddenDim+1)] += error[i]*grad; + for (j=0;j<hiddenDim;j++) + W1_grad[i*(hiddenDim+1)+j+1] += grad*error[i]*hidden[j]; + } + for (i=0;i<hiddenDim;i++) + { + double grad; + grad = 0; + for (j=0;j<outDim;j++) + grad += error[j]*W1[j*(hiddenDim+1)+i+1]; + grad *= 1-hidden[i]*hidden[i]; + W0_grad[i*(inDim+1)] += grad; + for (j=0;j<inDim;j++) + W0_grad[i*(inDim+1)+j+1] += grad*in[j]; + } + } + return rms; +} + +#define NB_THREADS 8 + +sem_t sem_begin[NB_THREADS]; +sem_t sem_end[NB_THREADS]; + +struct GradientArg { + int id; + int done; + MLPTrain *net; + float *inputs; + float *outputs; + int nbSamples; + double *W0_grad; + double *W1_grad; + double rms; + double error_rate; +}; + +void *gradient_thread_process(void *_arg) +{ + int W0_size, W1_size; + struct GradientArg *arg = _arg; + int *topo = arg->net->topo; + W0_size = (topo[0]+1)*topo[1]; + W1_size = (topo[1]+1)*topo[2]; + double W0_grad[W0_size]; + double W1_grad[W1_size]; + arg->W0_grad = W0_grad; + arg->W1_grad = W1_grad; + while (1) + { + sem_wait(&sem_begin[arg->id]); + if (arg->done) + break; + arg->rms = compute_gradient(arg->net, arg->inputs, arg->outputs, arg->nbSamples, arg->W0_grad, arg->W1_grad, &arg->error_rate); + sem_post(&sem_end[arg->id]); + } + fprintf(stderr, "done\n"); + return NULL; +} + +float mlp_train_backprop(MLPTrain *net, float *inputs, float *outputs, int nbSamples, int nbEpoch, float rate) +{ + int i, j; + int e; + float best_rms = 1e10; + int inDim, outDim, hiddenDim; + int *topo; + double *W0, *W1, *best_W0, *best_W1; + double *W0_old, *W1_old; + double *W0_old2, *W1_old2; + double *W0_grad, *W1_grad; + double *W0_oldgrad, *W1_oldgrad; + double *W0_rate, *W1_rate; + double *best_W0_rate, *best_W1_rate; + int W0_size, W1_size; + topo = net->topo; + W0_size = (topo[0]+1)*topo[1]; + W1_size = (topo[1]+1)*topo[2]; + struct GradientArg args[NB_THREADS]; + pthread_t thread[NB_THREADS]; + int samplePerPart = nbSamples/NB_THREADS; + int count_worse=0; + int count_retries=0; + + topo = net->topo; + inDim = net->topo[0]; + hiddenDim = net->topo[1]; + outDim = net->topo[2]; + W0 = net->weights[0]; + W1 = net->weights[1]; + best_W0 = net->best_weights[0]; + best_W1 = net->best_weights[1]; + W0_old = malloc(W0_size*sizeof(double)); + W1_old = malloc(W1_size*sizeof(double)); + W0_old2 = malloc(W0_size*sizeof(double)); + W1_old2 = malloc(W1_size*sizeof(double)); + W0_grad = malloc(W0_size*sizeof(double)); + W1_grad = malloc(W1_size*sizeof(double)); + W0_oldgrad = malloc(W0_size*sizeof(double)); + W1_oldgrad = malloc(W1_size*sizeof(double)); + W0_rate = malloc(W0_size*sizeof(double)); + W1_rate = malloc(W1_size*sizeof(double)); + best_W0_rate = malloc(W0_size*sizeof(double)); + best_W1_rate = malloc(W1_size*sizeof(double)); + memcpy(W0_old, W0, W0_size*sizeof(double)); + memcpy(W0_old2, W0, W0_size*sizeof(double)); + memset(W0_grad, 0, W0_size*sizeof(double)); + memset(W0_oldgrad, 0, W0_size*sizeof(double)); + memcpy(W1_old, W1, W1_size*sizeof(double)); + memcpy(W1_old2, W1, W1_size*sizeof(double)); + memset(W1_grad, 0, W1_size*sizeof(double)); + memset(W1_oldgrad, 0, W1_size*sizeof(double)); + + rate /= nbSamples; + for (i=0;i<hiddenDim;i++) + for (j=0;j<inDim+1;j++) + W0_rate[i*(inDim+1)+j] = rate*net->in_rate[j]; + for (i=0;i<W1_size;i++) + W1_rate[i] = rate; + + for (i=0;i<NB_THREADS;i++) + { + args[i].net = net; + args[i].inputs = inputs+i*samplePerPart*inDim; + args[i].outputs = outputs+i*samplePerPart*outDim; + args[i].nbSamples = samplePerPart; + args[i].id = i; + args[i].done = 0; + sem_init(&sem_begin[i], 0, 0); + sem_init(&sem_end[i], 0, 0); + pthread_create(&thread[i], NULL, gradient_thread_process, &args[i]); + } + for (e=0;e<nbEpoch;e++) + { + double rms=0; + double error_rate = 0; + for (i=0;i<NB_THREADS;i++) + { + sem_post(&sem_begin[i]); + } + memset(W0_grad, 0, W0_size*sizeof(double)); + memset(W1_grad, 0, W1_size*sizeof(double)); + for (i=0;i<NB_THREADS;i++) + { + sem_wait(&sem_end[i]); + rms += args[i].rms; + error_rate += args[i].error_rate; + for (j=0;j<W0_size;j++) + W0_grad[j] += args[i].W0_grad[j]; + for (j=0;j<W1_size;j++) + W1_grad[j] += args[i].W1_grad[j]; + } + + float mean_rate = 0, min_rate = 1e10; + rms = (rms/(outDim*nbSamples)); + error_rate = (error_rate/(outDim*nbSamples)); + fprintf (stderr, "%f (%f %f) ", error_rate, rms, best_rms); + if (rms < best_rms) + { + best_rms = rms; + for (i=0;i<W0_size;i++) + { + best_W0[i] = W0[i]; + best_W0_rate[i] = W0_rate[i]; + } + for (i=0;i<W1_size;i++) + { + best_W1[i] = W1[i]; + best_W1_rate[i] = W1_rate[i]; + } + count_worse=0; + count_retries=0; + } else { + count_worse++; + if (count_worse>30) + { + count_retries++; + count_worse=0; + for (i=0;i<W0_size;i++) + { + W0[i] = best_W0[i]; + best_W0_rate[i] *= .7; + if (best_W0_rate[i]<1e-15) best_W0_rate[i]=1e-15; + W0_rate[i] = best_W0_rate[i]; + W0_grad[i] = 0; + } + for (i=0;i<W1_size;i++) + { + W1[i] = best_W1[i]; + best_W1_rate[i] *= .8; + if (best_W1_rate[i]<1e-15) best_W1_rate[i]=1e-15; + W1_rate[i] = best_W1_rate[i]; + W1_grad[i] = 0; + } + } + } + if (count_retries>10) + break; + for (i=0;i<W0_size;i++) + { + if (W0_oldgrad[i]*W0_grad[i] > 0) + W0_rate[i] *= 1.01; + else if (W0_oldgrad[i]*W0_grad[i] < 0) + W0_rate[i] *= .9; + mean_rate += W0_rate[i]; + if (W0_rate[i] < min_rate) + min_rate = W0_rate[i]; + if (W0_rate[i] < 1e-15) + W0_rate[i] = 1e-15; + /*if (W0_rate[i] > .01) + W0_rate[i] = .01;*/ + W0_oldgrad[i] = W0_grad[i]; + W0_old2[i] = W0_old[i]; + W0_old[i] = W0[i]; + W0[i] += W0_grad[i]*W0_rate[i]; + } + for (i=0;i<W1_size;i++) + { + if (W1_oldgrad[i]*W1_grad[i] > 0) + W1_rate[i] *= 1.01; + else if (W1_oldgrad[i]*W1_grad[i] < 0) + W1_rate[i] *= .9; + mean_rate += W1_rate[i]; + if (W1_rate[i] < min_rate) + min_rate = W1_rate[i]; + if (W1_rate[i] < 1e-15) + W1_rate[i] = 1e-15; + W1_oldgrad[i] = W1_grad[i]; + W1_old2[i] = W1_old[i]; + W1_old[i] = W1[i]; + W1[i] += W1_grad[i]*W1_rate[i]; + } + mean_rate /= (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2]; + fprintf (stderr, "%g %d", mean_rate, e); + if (count_retries) + fprintf(stderr, " %d", count_retries); + fprintf(stderr, "\n"); + if (stopped) + break; + } + for (i=0;i<NB_THREADS;i++) + { + args[i].done = 1; + sem_post(&sem_begin[i]); + pthread_join(thread[i], NULL); + fprintf (stderr, "joined %d\n", i); + } + free(W0_old); + free(W1_old); + free(W0_grad); + free(W1_grad); + free(W0_rate); + free(W1_rate); + return best_rms; +} + +int main(int argc, char **argv) +{ + int i, j; + int nbInputs; + int nbOutputs; + int nbHidden; + int nbSamples; + int nbEpoch; + int nbRealInputs; + unsigned int seed; + int ret; + float rms; + float *inputs; + float *outputs; + if (argc!=6) + { + fprintf (stderr, "usage: mlp_train <inputs> <hidden> <outputs> <nb samples> <nb epoch>\n"); + return 1; + } + nbInputs = atoi(argv[1]); + nbHidden = atoi(argv[2]); + nbOutputs = atoi(argv[3]); + nbSamples = atoi(argv[4]); + nbEpoch = atoi(argv[5]); + nbRealInputs = nbInputs; + inputs = malloc(nbInputs*nbSamples*sizeof(*inputs)); + outputs = malloc(nbOutputs*nbSamples*sizeof(*outputs)); + + seed = time(NULL); + fprintf (stderr, "Seed is %u\n", seed); + srand(seed); + build_tansig_table(); + signal(SIGTERM, handler); + signal(SIGINT, handler); + signal(SIGHUP, handler); + for (i=0;i<nbSamples;i++) + { + for (j=0;j<nbRealInputs;j++) + ret = scanf(" %f", &inputs[i*nbInputs+j]); + for (j=0;j<nbOutputs;j++) + ret = scanf(" %f", &outputs[i*nbOutputs+j]); + if (feof(stdin)) + { + nbSamples = i; + break; + } + } + int topo[3] = {nbInputs, nbHidden, nbOutputs}; + MLPTrain *net; + + fprintf (stderr, "Got %d samples\n", nbSamples); + net = mlp_init(topo, 3, inputs, outputs, nbSamples); + rms = mlp_train_backprop(net, inputs, outputs, nbSamples, nbEpoch, 1); + printf ("#include \"mlp.h\"\n\n"); + printf ("/* RMS error was %f, seed was %u */\n\n", rms, seed); + printf ("static const float weights[%d] = {\n", (topo[0]+1)*topo[1] + (topo[1]+1)*topo[2]); + printf ("\n/* hidden layer */\n"); + for (i=0;i<(topo[0]+1)*topo[1];i++) + { + printf ("%g, ", net->weights[0][i]); + if (i%5==4) + printf("\n"); + } + printf ("\n/* output layer */\n"); + for (i=0;i<(topo[1]+1)*topo[2];i++) + { + printf ("%g, ", net->weights[1][i]); + if (i%5==4) + printf("\n"); + } + printf ("};\n\n"); + printf ("static const int topo[3] = {%d, %d, %d};\n\n", topo[0], topo[1], topo[2]); + printf ("const MLP net = {\n"); + printf ("\t3,\n"); + printf ("\ttopo,\n"); + printf ("\tweights\n};\n"); + return 0; +} diff --git a/src/mlp_train.h b/src/mlp_train.h new file mode 100644 index 00000000..1857f644 --- /dev/null +++ b/src/mlp_train.h @@ -0,0 +1,86 @@ +/* Copyright (c) 2008-2011 Octasic Inc. + Written by Jean-Marc Valin */ +/* + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + - Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + - Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR + CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifndef _MLP_TRAIN_H_ +#define _MLP_TRAIN_H_ + +#include <math.h> +#include <stdlib.h> + +double tansig_table[501]; +static inline double tansig_double(double x) +{ + return 2./(1.+exp(-2.*x)) - 1.; +} +static inline void build_tansig_table() +{ + int i; + for (i=0;i<501;i++) + tansig_table[i] = tansig_double(.04*(i-250)); +} + +static inline double tansig_approx(double x) +{ + int i; + double y, dy; + if (x>=10) + return 1; + if (x<=-10) + return -1; + i = lrint(25*x); + x -= .04*i; + y = tansig_table[250+i]; + dy = 1-y*y; + y = y + x*dy*(1 - y*x); + return y; +} + +inline float randn(float sd) +{ + float U1, U2, S, x; + do { + U1 = ((float)rand())/RAND_MAX; + U2 = ((float)rand())/RAND_MAX; + U1 = 2*U1-1; + U2 = 2*U2-1; + S = U1*U1 + U2*U2; + } while (S >= 1 || S == 0.0f); + x = sd*sqrt(-2 * log(S) / S) * U1; + return x; +} + + +typedef struct { + int layers; + int *topo; + double **weights; + double **best_weights; + double *in_rate; +} MLPTrain; + + +#endif /* _MLP_TRAIN_H_ */ diff --git a/src/opus_encoder.c b/src/opus_encoder.c index aae31256..4b6995d1 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -40,7 +40,8 @@ #include "arch.h" #include "opus_private.h" #include "os_support.h" - +#include "analysis.h" +#include "mathops.h" #include "tuning_parameters.h" #ifdef FIXED_POINT #include "fixed/structs_FIX.h" @@ -84,7 +85,9 @@ struct OpusEncoder { /* Sampling rate (at the API level) */ int first; opus_val16 delay_buffer[MAX_ENCODER_BUFFER*2]; - +#ifndef FIXED_POINT + TonalityAnalysisState analysis; +#endif opus_uint32 rangeFinal; }; @@ -365,6 +368,56 @@ static void hp_cutoff(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *ou #endif } +#ifdef FIXED_POINT +static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) +{ + int c, i; + int shift; + + /* Approximates -round(log2(4.*cutoff_Hz/Fs)) */ + shift=celt_ilog2(Fs/(cutoff_Hz*3)); + for (c=0;c<channels;c++) + { + for (i=0;i<len;i++) + { + opus_val32 x, tmp, y; + x = SHL32(EXTEND32(in[channels*i+c]), 15); + /* First stage */ + tmp = x-hp_mem[2*c]; + hp_mem[2*c] = hp_mem[2*c] + PSHR32(x - hp_mem[2*c], shift); + /* Second stage */ + y = tmp - hp_mem[2*c+1]; + hp_mem[2*c+1] = hp_mem[2*c+1] + PSHR32(tmp - hp_mem[2*c+1], shift); + out[channels*i+c] = EXTRACT16(SATURATE(PSHR32(y, 15), 32767)); + } + } +} + +#else +static void dc_reject(const opus_val16 *in, opus_int32 cutoff_Hz, opus_val16 *out, opus_val32 *hp_mem, int len, int channels, opus_int32 Fs) +{ + int c, i; + float coef; + + coef = 4.*cutoff_Hz/Fs; + for (c=0;c<channels;c++) + { + for (i=0;i<len;i++) + { + opus_val32 x, tmp, y; + x = in[channels*i+c]; + /* First stage */ + tmp = x-hp_mem[2*c]; + hp_mem[2*c] = hp_mem[2*c] + coef*(x - hp_mem[2*c]); + /* Second stage */ + y = tmp - hp_mem[2*c+1]; + hp_mem[2*c+1] = hp_mem[2*c+1] + coef*(tmp - hp_mem[2*c+1]); + out[channels*i+c] = y; + } + } +} +#endif + static void stereo_fade(const opus_val16 *in, opus_val16 *out, opus_val16 g1, opus_val16 g2, int overlap48, int frame_size, int channels, const opus_val16 *window, opus_int32 Fs) { @@ -472,6 +525,11 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s opus_int32 max_rate; /* Max bitrate we're allowed to use */ int curr_bandwidth; opus_int32 max_data_bytes; /* Max number of bytes we're allowed to use */ + int extra_buffer, total_buffer; + int perform_analysis=0; +#ifndef FIXED_POINT + AnalysisInfo analysis_info; +#endif VARDECL(opus_val16, tmp_prefill); ALLOC_STACK; @@ -493,11 +551,20 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s silk_enc = (char*)st+st->silk_enc_offset; celt_enc = (CELTEncoder*)((char*)st+st->celt_enc_offset); +#ifndef FIXED_POINT + perform_analysis = st->silk_mode.complexity >= 7 && frame_size >= st->Fs/100 && st->Fs==48000; +#endif if (st->application == OPUS_APPLICATION_RESTRICTED_LOWDELAY) delay_compensation = 0; else delay_compensation = st->delay_compensation; - + if (perform_analysis) + { + total_buffer = IMAX(st->Fs/200, delay_compensation); + } else { + total_buffer = delay_compensation; + } + extra_buffer = total_buffer-delay_compensation; st->bitrate_bps = user_bitrate_to_bitrate(st, frame_size, max_data_bytes); frame_rate = st->Fs/frame_size; @@ -839,9 +906,9 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s ec_enc_init(&enc, data, max_data_bytes-1); - ALLOC(pcm_buf, (delay_compensation+frame_size)*st->channels, opus_val16); - for (i=0;i<delay_compensation*st->channels;i++) - pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-delay_compensation)*st->channels+i]; + ALLOC(pcm_buf, (total_buffer+frame_size)*st->channels, opus_val16); + for (i=0;i<total_buffer*st->channels;i++) + pcm_buf[i] = st->delay_buffer[(st->encoder_buffer-total_buffer)*st->channels+i]; if (st->mode == MODE_CELT_ONLY) hp_freq_smth1 = silk_LSHIFT( silk_lin2log( VARIABLE_HP_MIN_CUTOFF_HZ ), 8 ); @@ -856,12 +923,26 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s if (st->application == OPUS_APPLICATION_VOIP) { - hp_cutoff(pcm, cutoff_Hz, &pcm_buf[delay_compensation*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); + hp_cutoff(pcm, cutoff_Hz, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } else { - for (i=0;i<frame_size*st->channels;i++) - pcm_buf[delay_compensation*st->channels + i] = pcm[i]; + dc_reject(pcm, 3, &pcm_buf[total_buffer*st->channels], st->hp_mem, frame_size, st->channels, st->Fs); } +#ifndef FIXED_POINT + if (perform_analysis) + { + int nb_analysis_frames; + nb_analysis_frames = frame_size/(st->Fs/100); + for (i=0;i<nb_analysis_frames;i++) + tonality_analysis(&st->analysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels); + if (st->signal_type == OPUS_AUTO) + st->voice_ratio = floor(.5+100*(1-analysis_info.music_prob)); + } else { + analysis_info.valid = 0; + st->voice_ratio = -1; + } +#endif + /* SILK processing */ if (st->mode != MODE_CELT_ONLY) { @@ -965,10 +1046,10 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s } #ifdef FIXED_POINT - pcm_silk = pcm_buf+delay_compensation*st->channels; + pcm_silk = pcm_buf+total_buffer*st->channels; #else for (i=0;i<frame_size*st->channels;i++) - pcm_silk[i] = FLOAT2INT16(pcm_buf[delay_compensation*st->channels + i]); + pcm_silk[i] = FLOAT2INT16(pcm_buf[total_buffer*st->channels + i]); #endif ret = silk_Encode( silk_enc, &st->silk_mode, pcm_silk, frame_size, &enc, &nBytes, 0 ); if( ret ) { @@ -1070,13 +1151,13 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s if (st->mode != MODE_SILK_ONLY && st->mode != st->prev_mode && st->prev_mode > 0) { for (i=0;i<st->channels*st->Fs/400;i++) - tmp_prefill[i] = st->delay_buffer[(st->encoder_buffer-st->delay_compensation-st->Fs/400)*st->channels + i]; + tmp_prefill[i] = st->delay_buffer[(extra_buffer+st->encoder_buffer-total_buffer-st->Fs/400)*st->channels + i]; } - for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+delay_compensation));i++) + for (i=0;i<st->channels*(st->encoder_buffer-(frame_size+total_buffer));i++) st->delay_buffer[i] = st->delay_buffer[i+st->channels*frame_size]; for (;i<st->encoder_buffer*st->channels;i++) - st->delay_buffer[i] = pcm_buf[(frame_size+delay_compensation-st->encoder_buffer)*st->channels+i]; + st->delay_buffer[i] = pcm_buf[(frame_size+total_buffer-st->encoder_buffer)*st->channels+i]; if (st->mode != MODE_HYBRID || st->stream_channels==1) @@ -1097,7 +1178,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s g1 *= (1.f/16384); g2 *= (1.f/16384); #endif - stereo_fade(pcm_buf, pcm_buf, g1, g2, celt_mode->overlap, + stereo_fade(pcm_buf+extra_buffer*st->channels, pcm_buf+extra_buffer*st->channels, g1, g2, celt_mode->overlap, frame_size, st->channels, celt_mode->window, st->Fs); st->hybrid_stereo_width_Q14 = st->silk_mode.stereoWidth_Q14; } @@ -1151,7 +1232,7 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s int err; celt_encoder_ctl(celt_enc, CELT_SET_START_BAND(0)); celt_encoder_ctl(celt_enc, OPUS_SET_VBR(0)); - err = celt_encode_with_ec(celt_enc, pcm_buf, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); + err = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, st->Fs/200, data+nb_compr_bytes, redundancy_bytes, NULL); if (err < 0) { RESTORE_STACK; @@ -1177,7 +1258,11 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s /* If false, we already busted the budget and we'll end up with a "PLC packet" */ if (ec_tell(&enc) <= 8*nb_compr_bytes) { - ret = celt_encode_with_ec(celt_enc, pcm_buf, frame_size, NULL, nb_compr_bytes, &enc); +#ifndef FIXED_POINT + if (perform_analysis) + celt_encoder_ctl(celt_enc, CELT_SET_ANALYSIS(&analysis_info)); +#endif + ret = celt_encode_with_ec(celt_enc, pcm_buf+extra_buffer*st->channels, frame_size, NULL, nb_compr_bytes, &enc); if (ret < 0) { RESTORE_STACK; @@ -1200,9 +1285,9 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s celt_encoder_ctl(celt_enc, CELT_SET_PREDICTION(0)); /* NOTE: We could speed this up slightly (at the expense of code size) by just adding a function that prefills the buffer */ - celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2-N4), N4, dummy, 2, NULL); + celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2-N4), N4, dummy, 2, NULL); - err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); + err = celt_encode_with_ec(celt_enc, pcm_buf+st->channels*(extra_buffer+frame_size-N2), N2, data+nb_compr_bytes, redundancy_bytes, NULL); if (err < 0) { RESTORE_STACK; diff --git a/src/tansig_table.h b/src/tansig_table.h new file mode 100644 index 00000000..a5aba230 --- /dev/null +++ b/src/tansig_table.h @@ -0,0 +1,45 @@ +/* This file is auto-generated by gen_tables */ + +static const opus_val16 tansig_table[201] = { +0.000000, 0.039979, 0.079830, 0.119427, 0.158649, +0.197375, 0.235496, 0.272905, 0.309507, 0.345214, +0.379949, 0.413644, 0.446244, 0.477700, 0.507977, +0.537050, 0.564900, 0.591519, 0.616909, 0.641077, +0.664037, 0.685809, 0.706419, 0.725897, 0.744277, +0.761594, 0.777888, 0.793199, 0.807569, 0.821040, +0.833655, 0.845456, 0.856485, 0.866784, 0.876393, +0.885352, 0.893698, 0.901468, 0.908698, 0.915420, +0.921669, 0.927473, 0.932862, 0.937863, 0.942503, +0.946806, 0.950795, 0.954492, 0.957917, 0.961090, +0.964028, 0.966747, 0.969265, 0.971594, 0.973749, +0.975743, 0.977587, 0.979293, 0.980869, 0.982327, +0.983675, 0.984921, 0.986072, 0.987136, 0.988119, +0.989027, 0.989867, 0.990642, 0.991359, 0.992020, +0.992631, 0.993196, 0.993718, 0.994199, 0.994644, +0.995055, 0.995434, 0.995784, 0.996108, 0.996407, +0.996682, 0.996937, 0.997172, 0.997389, 0.997590, +0.997775, 0.997946, 0.998104, 0.998249, 0.998384, +0.998508, 0.998623, 0.998728, 0.998826, 0.998916, +0.999000, 0.999076, 0.999147, 0.999213, 0.999273, +0.999329, 0.999381, 0.999428, 0.999472, 0.999513, +0.999550, 0.999585, 0.999617, 0.999646, 0.999673, +0.999699, 0.999722, 0.999743, 0.999763, 0.999781, +0.999798, 0.999813, 0.999828, 0.999841, 0.999853, +0.999865, 0.999875, 0.999885, 0.999893, 0.999902, +0.999909, 0.999916, 0.999923, 0.999929, 0.999934, +0.999939, 0.999944, 0.999948, 0.999952, 0.999956, +0.999959, 0.999962, 0.999965, 0.999968, 0.999970, +0.999973, 0.999975, 0.999977, 0.999978, 0.999980, +0.999982, 0.999983, 0.999984, 0.999986, 0.999987, +0.999988, 0.999989, 0.999990, 0.999990, 0.999991, +0.999992, 0.999992, 0.999993, 0.999994, 0.999994, +0.999994, 0.999995, 0.999995, 0.999996, 0.999996, +0.999996, 0.999997, 0.999997, 0.999997, 0.999997, +0.999997, 0.999998, 0.999998, 0.999998, 0.999998, +0.999998, 0.999998, 0.999999, 0.999999, 0.999999, +0.999999, 0.999999, 0.999999, 0.999999, 0.999999, +0.999999, 0.999999, 0.999999, 0.999999, 0.999999, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, 1.000000, 1.000000, 1.000000, 1.000000, +1.000000, +}; |