summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJean-Marc Valin <jmvalin@jmvalin.ca>2013-05-25 02:14:25 -0400
committerJean-Marc Valin <jmvalin@jmvalin.ca>2013-05-25 02:14:25 -0400
commite8e57a32f6e9e11998e60272f131880c95de271f (patch)
tree3a63fc88f7d8f3edd66e8aa930a02fb41431f9d3
parentfbf99981a6a5acdb032f42d6377ca5b5dff19a20 (diff)
downloadopus-e8e57a32f6e9e11998e60272f131880c95de271f.tar.gz
Optimizes _celt_autocorr() by using pitch_xcorr()
Computes most of the auto-correlation by reusing pitch_xcorr(). We only need lag*(lag-1)/2 MACs to complete the calculations. To do this, pitch_xcorr() was modified so that it no longer truncates the length to a multiple of 4. Also, the xcorr didn't need the floor at -1. As a side benefit, this speeds up the PLC, which uses a higher order LPC filter.
-rw-r--r--celt/celt_lpc.c7
-rw-r--r--celt/pitch.c66
-rw-r--r--celt/pitch.h7
3 files changed, 58 insertions, 22 deletions
diff --git a/celt/celt_lpc.c b/celt/celt_lpc.c
index 1838319d..8c6cfecf 100644
--- a/celt/celt_lpc.c
+++ b/celt/celt_lpc.c
@@ -32,6 +32,7 @@
#include "celt_lpc.h"
#include "stack_alloc.h"
#include "mathops.h"
+#include "pitch.h"
void _celt_lpc(
opus_val16 *_lpc, /* out: [0...p-1] LPC coefficients */
@@ -147,6 +148,7 @@ void _celt_autocorr(
{
opus_val32 d;
int i;
+ int fastN=n-lag;
VARDECL(opus_val16, xx);
SAVE_STACK;
ALLOC(xx, n, opus_val16);
@@ -177,11 +179,12 @@ void _celt_autocorr(
xx[i] = VSHR32(xx[i], shift);
}
#endif
+ pitch_xcorr(xx, xx, ac, fastN, lag+1);
while (lag>=0)
{
- for (i = lag, d = 0; i < n; i++)
+ for (i = lag+fastN, d = 0; i < n; i++)
d = MAC16_16(d, xx[i], xx[i-lag]);
- ac[lag] = d;
+ ac[lag] += d;
/*printf ("%f ", ac[lag]);*/
lag--;
}
diff --git a/celt/pitch.c b/celt/pitch.c
index 0549804b..d4a3c115 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -217,11 +217,12 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
#if 0 /* This is a simple version of the pitch correlation that should work
well on DSPs like Blackfin and TI C5x/C6x */
-static void pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch
#ifdef FIXED_POINT
- ,opus_val32 *maxval
+opus_val32
+#else
+void
#endif
- )
+pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
{
int i, j;
#ifdef FIXED_POINT
@@ -232,30 +233,29 @@ static void pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, x[j],y[i+j]);
- xcorr[i] = MAX32(-1, sum);
+ xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
#endif
}
#ifdef FIXED_POINT
- *maxval = maxcorr;
+ return maxcorr;
#endif
}
#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
-static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch
#ifdef FIXED_POINT
- ,opus_val32 *maxval
+opus_val32
+#else
+void
#endif
- )
+pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch)
{
int i,j;
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
- /* Truncate slightly if len is not a multiple of 4. */
- len -= len&3;
for (i=0;i<max_pitch-3;i+=4)
{
/* Compute correlation*/
@@ -271,7 +271,7 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l
y0=*y++;
y1=*y++;
y2=*y++;
- for (j=0;j<len;j+=4)
+ for (j=0;j<len-3;j+=4)
{
opus_val16 tmp;
tmp = *x++;
@@ -299,10 +299,37 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l
sum3 = MAC16_16(sum3,tmp,y1);
sum4 = MAC16_16(sum4,tmp,y2);
}
- xcorr[i]=MAX32(-1, sum1);
- xcorr[i+1]=MAX32(-1, sum2);
- xcorr[i+2]=MAX32(-1, sum3);
- xcorr[i+3]=MAX32(-1, sum4);
+ if (j++<len)
+ {
+ opus_val16 tmp = *x++;
+ y3=*y++;
+ sum1 = MAC16_16(sum1,tmp,y0);
+ sum2 = MAC16_16(sum2,tmp,y1);
+ sum3 = MAC16_16(sum3,tmp,y2);
+ sum4 = MAC16_16(sum4,tmp,y3);
+ }
+ if (j++<len)
+ {
+ opus_val16 tmp=*x++;
+ y0=*y++;
+ sum1 = MAC16_16(sum1,tmp,y1);
+ sum2 = MAC16_16(sum2,tmp,y2);
+ sum3 = MAC16_16(sum3,tmp,y3);
+ sum4 = MAC16_16(sum4,tmp,y0);
+ }
+ if (j<len)
+ {
+ opus_val16 tmp=*x++;
+ y1=*y++;
+ sum1 = MAC16_16(sum1,tmp,y2);
+ sum2 = MAC16_16(sum2,tmp,y3);
+ sum3 = MAC16_16(sum3,tmp,y0);
+ sum4 = MAC16_16(sum4,tmp,y1);
+ }
+ xcorr[i]=sum1;
+ xcorr[i+1]=sum2;
+ xcorr[i+2]=sum3;
+ xcorr[i+3]=sum4;
#ifdef FIXED_POINT
sum1 = MAX32(sum1, sum2);
sum3 = MAX32(sum3, sum4);
@@ -316,13 +343,13 @@ static void pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int l
opus_val32 sum = 0;
for (j=0;j<len;j++)
sum = MAC16_16(sum, _x[j],_y[i+j]);
- xcorr[i] = MAX32(-1, sum);
+ xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
#endif
}
#ifdef FIXED_POINT
- *maxval = maxcorr;
+ return maxcorr;
#endif
}
@@ -378,11 +405,10 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
/* Coarse search with 4x decimation */
- pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2
#ifdef FIXED_POINT
- ,&maxcorr
+ maxcorr =
#endif
- );
+ pitch_xcorr(x_lp4, y_lp4, xcorr, len>>2, max_pitch>>2);
find_best_pitch(xcorr, y_lp4, len>>2, max_pitch>>2, best_pitch
#ifdef FIXED_POINT
diff --git a/celt/pitch.h b/celt/pitch.h
index 2757071a..1d567b07 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -45,4 +45,11 @@ void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTR
opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
int N, int *T0, int prev_period, opus_val16 prev_gain);
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+pitch_xcorr(opus_val16 *_x, opus_val16 *_y, opus_val32 *xcorr, int len, int max_pitch);
+
#endif