diff options
author | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2013-05-25 02:14:25 -0400 |
---|---|---|
committer | Jean-Marc Valin <jmvalin@jmvalin.ca> | 2013-05-25 02:14:25 -0400 |
commit | e8e57a32f6e9e11998e60272f131880c95de271f (patch) | |
tree | 3a63fc88f7d8f3edd66e8aa930a02fb41431f9d3 | |
parent | fbf99981a6a5acdb032f42d6377ca5b5dff19a20 (diff) | |
download | opus-e8e57a32f6e9e11998e60272f131880c95de271f.tar.gz |
Optimizes _celt_autocorr() by using pitch_xcorr()
Computes most of the auto-correlation by reusing pitch_xcorr(). We only
need lag*(lag-1)/2 MACs to complete the calculations.
To do this, pitch_xcorr() was modified so that it no longer truncates the
length to a multiple of 4. Also, the xcorr didn't need the floor at -1.
As a side benefit, this speeds up the PLC, which uses a higher order LPC
filter.
-rw-r--r-- | celt/celt_lpc.c | 7 | ||||
-rw-r--r-- | celt/pitch.c | 66 | ||||
-rw-r--r-- | celt/pitch.h | 7 |
3 files changed, 58 insertions, 22 deletions
diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c index 1838319d..8c6cfecf 100644 --- a/celt/celt_lpc.c +++ b/celt/celt_lpc.c @@ -32,6 +32,7 @@ #include "celt_lpc.h" #include "stack_alloc.h" #include "mathops.h" +#include "pitch.h" void _celt_lpc( opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */ @@ -147,6 +148,7 @@ void _celt_autocorr( { opus_val32 d; int i; + int fastN=n-lag; VARDECL(opus_val16, xx); SAVE_STACK; ALLOC(xx, n, opus_val16); @@ -177,11 +179,12 @@ void _celt_autocorr( xx[i] = VSHR32(xx[i], shift); } #endif + pitch_xcorr(xx, xx, ac, fastN, lag+1); while (lag>=0) { - for (i = lag, d = 0; i < n; i++) + for (i = lag+fastN, d = 0; i < n; i++) d = MAC16_16(d, xx[i], xx[i-lag]); - ac[lag] = d; + ac[lag] += d; /*printf ("%f ", ac[lag]);*/ lag--; } diff --git a/celt/pitch.c b/celt/pitch.c index 0549804b..d4a3c115 100644 --- a/celt/pitch.c +++ b/celt/pitch.c @@ -217,11 +217,12 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x #if 0 /* This is a simple version of the pitch correlation that should work well on DSPs like Blackfin and TI C5x/C6x */ -static void pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch #ifdef FIXED_POINT - ,opus_val32 *maxval +opus_val32 +#else +void #endif - ) +pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch) { int i, j; #ifdef FIXED_POINT @@ -232,30 +233,29 @@ static void pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len opus_val32 sum = 0; for (j=0;j<len;j++) sum = MAC16_16(sum, x[j],y[i+j]); - xcorr[i] = MAX32(-1, sum); + xcorr[i] = sum; #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); #endif } #ifdef FIXED_POINT - *maxval = maxcorr; + return maxcorr; #endif } #else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */ -static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch #ifdef FIXED_POINT - ,opus_val32 *maxval +opus_val32 +#else +void #endif - ) +pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch) { int i,j; #ifdef FIXED_POINT opus_val32 maxcorr=1; #endif - /* Truncate slightly if len is not a multiple of 4. */ - len -= len&3; for (i=0;i<max_pitch-3;i+=4) { /* Compute correlation*/ @@ -271,7 +271,7 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l y0=*y++; y1=*y++; y2=*y++; - for (j=0;j<len;j+=4) + for (j=0;j<len-3;j+=4) { opus_val16 tmp; tmp = *x++; @@ -299,10 +299,37 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l sum3 = MAC16_16(sum3,tmp,y1); sum4 = MAC16_16(sum4,tmp,y2); } - xcorr[i]=MAX32(-1, sum1); - xcorr[i+1]=MAX32(-1, sum2); - xcorr[i+2]=MAX32(-1, sum3); - xcorr[i+3]=MAX32(-1, sum4); + if (j++<len) + { + opus_val16 tmp = *x++; + y3=*y++; + sum1 = MAC16_16(sum1,tmp,y0); + sum2 = MAC16_16(sum2,tmp,y1); + sum3 = MAC16_16(sum3,tmp,y2); + sum4 = MAC16_16(sum4,tmp,y3); + } + if (j++<len) + { + opus_val16 tmp=*x++; + y0=*y++; + sum1 = MAC16_16(sum1,tmp,y1); + sum2 = MAC16_16(sum2,tmp,y2); + sum3 = MAC16_16(sum3,tmp,y3); + sum4 = MAC16_16(sum4,tmp,y0); + } + if (j<len) + { + opus_val16 tmp=*x++; + y1=*y++; + sum1 = MAC16_16(sum1,tmp,y2); + sum2 = MAC16_16(sum2,tmp,y3); + sum3 = MAC16_16(sum3,tmp,y0); + sum4 = MAC16_16(sum4,tmp,y1); + } + xcorr[i]=sum1; + xcorr[i+1]=sum2; + xcorr[i+2]=sum3; + xcorr[i+3]=sum4; #ifdef FIXED_POINT sum1 = MAX32(sum1, sum2); sum3 = MAX32(sum3, sum4); @@ -316,13 +343,13 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l opus_val32 sum = 0; for (j=0;j<len;j++) sum = MAC16_16(sum, _x[j],_y[i+j]); - xcorr[i] = MAX32(-1, sum); + xcorr[i] = sum; #ifdef FIXED_POINT maxcorr = MAX32(maxcorr, sum); #endif } #ifdef FIXED_POINT - *maxval = maxcorr; + return maxcorr; #endif } @@ -378,11 +405,10 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR /* Coarse search with 4x decimation */ - pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2 #ifdef FIXED_POINT - ,&maxcorr + maxcorr = #endif - ); + pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2); find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch #ifdef FIXED_POINT diff --git a/celt/pitch.h b/celt/pitch.h index 2757071a..1d567b07 100644 --- a/celt/pitch.h +++ b/celt/pitch.h @@ -45,4 +45,11 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod, int N, int *T0, int prev_period, opus_val16 prev_gain); +#ifdef FIXED_POINT +opus_val32 +#else +void +#endif +pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch); + #endif |