From d0752af321245d8c64a502b695992cb83240df4f Mon Sep 17 00:00:00 2001 From: Jean-Marc Valin Date: Tue, 22 Nov 2011 22:44:56 -0500 Subject: Adds MFCC standard deviation features --- src/analysis.c | 37 ++++++++++++++----- src/mlp_data.c | 104 +++++++++++++++++++++++++++++++---------------------- src/opus_encoder.c | 3 ++ 3 files changed, 93 insertions(+), 51 deletions(-) diff --git a/src/analysis.c b/src/analysis.c index 7fd4b19c..e55d68a0 100644 --- a/src/analysis.c +++ b/src/analysis.c @@ -82,6 +82,8 @@ typedef struct { float lowE[NB_TBANDS], highE[NB_TBANDS]; float mem[32]; float cmean[8]; + float std[9]; + float music_prob; int E_count; int count; } TonalityAnalysisState; @@ -109,8 +111,13 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc float frame_stationarity; float relativeE; float frame_prob; + float alpha; celt_encoder_ctl(celt_enc, CELT_GET_MODE(&mode)); + alpha = 1.f/IMIN(20, 1+tonal->count); + + if (tonal->count<4) + tonal->music_prob = .5; kfft = mode->mdct.kfft[0]; if (C==1) { @@ -283,14 +290,21 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc for (i=0;i<5;i++) features[i] = -0.12299*(BFCC[i]+tonal->mem[i+24]) + 0.49195*(tonal->mem[i]+tonal->mem[i+16]) + 0.69693*tonal->mem[i+8] - 1.4349*tonal->cmean[i]; + for (i=0;i<5;i++) - tonal->cmean[i] = .95*tonal->cmean[i] + .05*BFCC[i]; + tonal->cmean[i] = (1-alpha)*tonal->cmean[i] + alpha*(i==0)*BFCC[i]; for (i=0;i<5;i++) features[5+i] = 0.63246*(BFCC[i]-tonal->mem[i+24]) + 0.31623*(tonal->mem[i]-tonal->mem[i+16]); for (i=0;i<4;i++) features[10+i] = 0.53452*(BFCC[i]+tonal->mem[i+24]) - 0.26726*(tonal->mem[i]+tonal->mem[i+16]) -0.53452*tonal->mem[i+8]; + if (tonal->count > 5) + { + for (i=0;i<9;i++) + tonal->std[i] = (1-alpha)*tonal->std[i] + alpha*features[5+i]*features[5+i]; + } + for (i=0;i<8;i++) { tonal->mem[i+24] = tonal->mem[i+16]; @@ -301,28 +315,33 @@ void tonality_analysis(TonalityAnalysisState *tonal, AnalysisInfo *info, CELTEnc features[14] = info->tonality; features[15] = info->activity; features[16] = frame_stationarity; + features[17] = info->tonality_slope; + for (i=0;i<9;i++) + features[18+i] = sqrt(tonal->std[i]); #ifndef FIXED_POINT mlp_process(&net, features, &frame_prob); frame_prob = .5*(frame_prob+1); + frame_prob = MAX16(.01f, MIN16(0.99f, frame_prob)); /*frame_prob = .45*frame_prob + .55*frame_prob*frame_prob*frame_prob;*/ /*printf("%f\n", frame_prob);*/ { - float alpha, beta; + float tau, beta; float p0, p1; - alpha = .01; - beta = .2; - p0 = (1-info->music_prob)*(1-alpha) + info->music_prob *alpha; - p1 = info->music_prob *(1-alpha) + (1-info->music_prob)*alpha; + tau = .0001; + beta = .1; + p0 = (1-tonal->music_prob)*(1-tau) + tonal->music_prob *tau; + p1 = tonal->music_prob *(1-tau) + (1-tonal->music_prob)*tau; p0 *= pow(1-frame_prob, beta); p1 *= pow(frame_prob, beta); - info->music_prob = p1/(p0+p1); - /*printf("%f\n", info->music_prob);*/ + tonal->music_prob = MAX16(0.01f, MIN16(0.99f, p1/(p0+p1))); + info->music_prob = tonal->music_prob; + /*printf("%f %f\n", frame_prob, info->music_prob);*/ } #else info->music_prob = 0; #endif - /*for (i=0;i<17;i++) + /*for (i=0;i<27;i++) printf("%f ", features[i]); printf("\n");*/ diff --git a/src/mlp_data.c b/src/mlp_data.c index c40c65e5..3f65398a 100644 --- a/src/mlp_data.c +++ b/src/mlp_data.c @@ -1,53 +1,73 @@ #include "mlp.h" -/* RMS error was 0.289869, seed was 1321927439 */ +/* RMS error was 0.082498, seed was 1321973245 */ -static const float weights[191] = { +static const float weights[291] = { /* hidden layer */ -2.96755, 0.00115262, -0.241332, -0.176303, -0.23881, --0.104982, 0.0976941, 0.0866153, 0.0445939, 0.109937, -0.0233463, 0.0474137, -0.214098, -0.104922, -0.182143, --8.4938, -6.0075, -0.521072, 0.0844896, -0.017247, --0.00201771, 0.102053, -0.000613734, -0.0365414, -0.313326, --0.388116, 0.195686, 0.415635, 0.139734, -0.375908, --0.495817, 0.197472, 0.108982, 15.8272, 1.39337, -0.295248, 0.478087, -0.521584, 0.547038, -0.970695, -0.410447, -0.0398747, -0.157433, 0.225191, -0.159447, -0.0773803, -0.14035, -0.432067, 0.436629, -0.81, -0.258396, -1.35361, 0.379898, 0.0501006, 5.69164, -0.0244047, -0.0253634, 0.0778099, -0.00686666, 0.0670103, -0.131673, -0.0865675, -0.088408, -0.0215524, -0.105515, -0.130154, -0.0107951, 0.0149045, -0.00721336, 2.70627, --5.84219, 0.845236, 2.9728, 0.0480452, 0.0522916, --0.17107, -0.844511, 0.086013, 0.0808069, 0.0362425, -0.104797, 0.0312275, 0.100703, 0.0868895, 0.107739, --0.155109, -0.743343, 2.12173, -3.50347, 3.38095, --4.60509, -0.0940445, 0.133728, -0.0279815, 0.072341, -0.0587296, -0.490762, -0.68488, -0.171973, -0.0674625, -0.0557464, -0.000785266, 0.326857, -0.109421, 0.0148745, --22.8631, 4.74747, -0.927737, -0.125692, -0.484348, -0.448016, -0.858588, 0.36091, -0.0261568, -0.193647, -0.224419, -0.156897, 0.0704276, -0.133405, -0.420752, -0.374365, -0.718979, 0.213614, -0.00984738, 1.75345, --0.739806, 1.5547, 0.23016, -0.314378, -0.221868, -0.444039, -0.287516, 0.0769676, 0.025709, 0.0960222, --0.0841409, 0.188217, 0.311774, -0.269616, -0.237803, -0.318714, 5.50044, -3.76367, 5.06448, 0.592319, --3.03044, 3.38612, -4.38443, 1.30165, -0.804144, --0.531203, 0.605657, -0.43792, 0.352739, -0.0578825, --2.29906, 2.33548, -2.93828, 0.74198, 21.5563, -5.77912, -11.0732, 0.552401, -0.274121, -0.615635, --0.142968, -0.201479, -0.0541993, 0.0475207, 0.222928, --0.0327647, -0.0123197, -0.00380516, -0.149003, -0.313818, --0.137811, -0.181652, 1.23463, 2.17364, 0.229491, +1.98961, -0.0130782, 0.00232344, 0.034999, -0.111098, +-0.0306255, 5.99275e-05, 0.0279719, -0.0122697, -0.0743631, +-0.0265766, -0.0475938, -0.0358393, 0.0266045, -0.118931, +2.27785, 3.14688, 0.407808, 1.40886, -0.0050141, +-0.289166, -0.507755, -0.699676, -1.15413, 0.00851358, +0.252532, -0.36482, -0.408518, 5.64382, 0.0997894, +-0.0181532, 0.026249, 0.00308319, 0.0723668, 0.0562646, +0.00452278, 0.083442, 0.0495319, 0.00983553, 0.136991, +-0.0351484, 0.0259153, 0.00614155, 1.19176, -2.81638, +-0.514078, 0.881382, -0.257151, -0.486597, -0.187349, +-1.26923, -0.464027, 0.181971, 0.485618, 0.0691355, +1.36658, -0.173798, -0.0413093, -0.265832, -1.01119, +-0.0471417, 0.201685, -0.132444, -0.0361785, -0.0858083, +0.0962927, -0.0848372, -0.152459, -0.287675, -0.822893, +0.135702, -16.7317, 7.52835, -6.51729, 17.316, +-0.138875, -0.591076, -0.87604, -0.0401374, -0.439988, +0.514362, 0.978875, 0.114317, 0.652455, 2.75847, +0.127838, -0.0673431, -0.058183, -0.104182, 0.073971, +-0.00959418, 0.123298, 0.100977, 0.0675852, 0.0445473, +0.166355, -0.0726645, -0.0597856, -0.0462871, 0.0931391, +-0.431765, -0.954361, -0.852153, 0.0753634, -0.436737, +-0.404625, -0.215624, -0.268892, 0.521599, -0.201161, +-1.0103, 0.115346, -0.997492, 0.0101888, 0.266479, +0.116699, 0.149483, 0.00956709, 0.0848689, 0.0285464, +-0.0281497, 0.0368108, -0.0187327, 0.133623, 0.160256, +0.132136, 0.0975494, 0.809777, -2.9119, 1.60906, +0.433096, -0.0618059, -0.0886098, 0.027982, -0.817968, +-0.08592, 0.0535723, 0.354925, 0.586248, 1.21096, +-0.723206, -0.0159389, 0.0745776, 0.0526613, 0.133663, +0.0446259, -0.0496362, 0.0188244, 0.0471644, -6.58117e-05, +0.0333697, -0.0445606, 0.0627888, 0.0244612, 0.123508, +4.18755, 1.53047, -1.88803, -4.54538, 0.20431, +0.0413455, 0.50537, 0.338508, 0.252371, -0.000301334, +-0.370387, -1.02901, -0.616503, -0.652812, 0.00404532, +0.0624655, 0.255373, 0.05062, -0.00559389, -0.0594389, +0.000658649, -0.0496338, 0.00196121, -0.00272021, -0.0455981, +0.0595863, 0.193358, 0.030662, -2.04355, 1.55208, +-0.523008, -0.15559, 0.177406, 0.230804, 0.70517, +0.000395192, -0.379844, -0.0423835, -0.195152, -0.4136, +-0.524797, 1.05256, -0.109296, -0.637306, 0.0539148, +-0.0858552, -0.606899, -0.300064, -0.0766599, -0.0802716, +0.00901309, 0.0400887, -0.456173, -0.47669, -0.00608932, +-0.219657, -4.26855, 10.958, -13.9916, -3.13647, +0.0392604, 0.214159, 0.120234, -0.321367, 0.139534, +0.0780652, 0.151579, -0.797584, 0.4504, -0.521148, +0.0438544, -0.445612, -1.2245, -0.378183, -0.00752445, +-0.517898, 0.490149, 0.561133, -0.314982, 0.177619, +-0.296543, 0.0727557, -0.830196, -0.328138, -0.829027, +-0.0582978, 1.13614, -12.5848, 0.0546282, -0.97928, +-0.754297, -0.223966, -0.175016, -0.0240597, 0.172425, +0.209448, 2.25374, 1.69033, -0.0182459, -0.0484306, +-0.0919532, 0.0966783, 0.365315, 0.494533, -0.727803, +0.780301, -0.580837, 0.355177, -0.1704, 0.0151144, +-0.240249, 0.197192, -7.70031, 0.771764, -0.98316, +6.65569, -0.0561571, -0.125888, -0.0773176, 0.176193, +0.711231, -0.167186, -0.0388936, -0.460926, 0.103807, /* output layer */ --7.91184, -1.52122, 0.603183, -3.27692, 3.61369, -1.16504, -1.1068, 2.80566, 0.85419, 0.545877, -0.804097, }; +-1.36902, 1.7905, 2.34488, -0.948134, -1.51596, +-2.37084, 3.30504, -4.03211, -0.918167, -0.841675, +-0.859274, }; -static const int topo[3] = {17, 10, 1}; +static const int topo[3] = {27, 10, 1}; const MLP net = { 3, diff --git a/src/opus_encoder.c b/src/opus_encoder.c index e08bcdb2..2b765184 100644 --- a/src/opus_encoder.c +++ b/src/opus_encoder.c @@ -867,8 +867,11 @@ opus_int32 opus_encode_float(OpusEncoder *st, const opus_val16 *pcm, int frame_s nb_analysis_frames = frame_size/(st->Fs/100); for (i=0;ianalysis, &analysis_info, celt_enc, pcm_buf+i*(st->Fs/100)*st->channels, st->channels); + if (st->signal_type == OPUS_AUTO) + st->voice_ratio = floor(.5+100*(1-analysis_info.music_prob)); } else { analysis_info.valid = 0; + st->voice_ratio = -1; } #endif -- cgit v1.2.1