summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimothy B. Terriberry <tterribe@xiph.org>2014-12-01 10:47:25 -0800
committerTimothy B. Terriberry <tterribe@xiph.org>2014-12-01 14:25:52 -0800
commitaad281878d650f680a895730eaa386df7cb3e8f0 (patch)
treed73b63c89312ea2615340099ca95eb77338fa9bf
parent25b27a9c167302769db512a9e32c66323bc7904c (diff)
downloadopus-aad281878d650f680a895730eaa386df7cb3e8f0.tar.gz
Fix celt_pitch_xcorr_c signature.
This should not take an arch parameter, so it can properly be used as a fallback for accelerated versions which do not. This patch instead provides a separate version which can call accelerated helpers for platforms that have taken that approach.
-rw-r--r--celt/mips/pitch_mipsr1.h8
-rw-r--r--celt/pitch.c40
-rw-r--r--celt/pitch.h19
-rw-r--r--celt/x86/pitch_sse.h5
4 files changed, 45 insertions, 27 deletions
diff --git a/celt/mips/pitch_mipsr1.h b/celt/mips/pitch_mipsr1.h
index 9c5dc34d..e2f017e9 100644
--- a/celt/mips/pitch_mipsr1.h
+++ b/celt/mips/pitch_mipsr1.h
@@ -58,8 +58,8 @@ static inline void dual_inner_prod(const opus_val16 *x, const opus_val16 *y01, c
*xy2 = xy02;
}
-#define OVERRIDE_XCORR_KERNEL
-static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
+static inline void xcorr_kernel_mips(const opus_val16 * x,
+ const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
opus_val16 y_0, y_1, y_2, y_3;
@@ -151,4 +151,8 @@ static inline void xcorr_kernel(const opus_val16 * x, const opus_val16 * y, opus
sum[3] = (opus_val32)sum_3;
}
+#define OVERRIDE_XCORR_KERNEL
+#define xcorr_kernel(x, y, sum, len, arch) \
+ ((void)(arch), xcorr_kernel_mips(x, y, sum, len))
+
#endif /* PITCH_MIPSR1_H */
diff --git a/celt/pitch.c b/celt/pitch.c
index 154c8484..43647030 100644
--- a/celt/pitch.c
+++ b/celt/pitch.c
@@ -214,25 +214,35 @@ void pitch_downsample(celt_sig * OPUS_RESTRICT x[], opus_val16 * OPUS_RESTRICT x
celt_fir5(x_lp, lpc2, x_lp, len>>1, mem);
}
-#if 0 /* This is a simple version of the pitch correlation that should work
- well on DSPs like Blackfin and TI C5x/C6x */
-
+/* Pure C implementation. */
#ifdef FIXED_POINT
opus_val32
#else
void
#endif
-celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int max_pitch)
+#if defined(OVERRIDE_PITCH_XCORR)
+celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
+ opus_val32 *xcorr, int len, int max_pitch)
+#else
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
+ opus_val32 *xcorr, int len, int max_pitch, int arch)
+#endif
{
+
+#if 0 /* This is a simple version of the pitch correlation that should work
+ well on DSPs like Blackfin and TI C5x/C6x */
int i, j;
#ifdef FIXED_POINT
opus_val32 maxcorr=1;
#endif
+#if !defined(OVERRIDE_PITCH_XCORR)
+ (void)arch;
+#endif
for (i=0;i<max_pitch;i++)
{
opus_val32 sum = 0;
for (j=0;j<len;j++)
- sum = MAC16_16(sum, x[j],y[i+j]);
+ sum = MAC16_16(sum, _x[j], _y[i+j]);
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@@ -241,18 +251,8 @@ celt_pitch_xcorr(opus_val16 *x, opus_val16 *y, opus_val32 *xcorr, int len, int m
#ifdef FIXED_POINT
return maxcorr;
#endif
-}
#else /* Unrolled version of the pitch correlation -- runs faster on x86 and ARM */
-
-#ifdef FIXED_POINT
-opus_val32
-#else
-void
-#endif
-celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
- opus_val32 *xcorr, int len, int max_pitch, int arch)
-{
int i;
/*The EDSP version requires that max_pitch is at least 1, and that _x is
32-bit aligned.
@@ -265,7 +265,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
for (i=0;i<max_pitch-3;i+=4)
{
opus_val32 sum[4]={0,0,0,0};
+#if defined(OVERRIDE_PITCH_XCORR)
+ xcorr_kernel_c(_x, _y+i, sum, len);
+#else
xcorr_kernel(_x, _y+i, sum, len, arch);
+#endif
xcorr[i]=sum[0];
xcorr[i+1]=sum[1];
xcorr[i+2]=sum[2];
@@ -281,7 +285,11 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
for (;i<max_pitch;i++)
{
opus_val32 sum;
+#if defined(OVERRIDE_PITCH_XCORR)
+ sum = celt_inner_prod_c(_x, _y+i, len);
+#else
sum = celt_inner_prod(_x, _y+i, len, arch);
+#endif
xcorr[i] = sum;
#ifdef FIXED_POINT
maxcorr = MAX32(maxcorr, sum);
@@ -290,9 +298,9 @@ celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
#ifdef FIXED_POINT
return maxcorr;
#endif
+#endif
}
-#endif
void pitch_search(const opus_val16 * OPUS_RESTRICT x_lp, opus_val16 * OPUS_RESTRICT y,
int len, int max_pitch, int *pitch, int arch)
{
diff --git a/celt/pitch.h b/celt/pitch.h
index 027ebd9b..5c6e551a 100644
--- a/celt/pitch.h
+++ b/celt/pitch.h
@@ -62,7 +62,6 @@ opus_val16 remove_doubling(opus_val16 *x, int maxperiod, int minperiod,
/* OPT: This is the kernel you really want to optimize. It gets used a lot
by the prefilter and by the PLC. */
-#ifndef OVERRIDE_XCORR_KERNEL
static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 * y, opus_val32 sum[4], int len)
{
int j;
@@ -129,11 +128,9 @@ static OPUS_INLINE void xcorr_kernel_c(const opus_val16 * x, const opus_val16 *
}
}
-#if !defined(OPUS_X86_MAY_HAVE_SSE4_1)
+#ifndef OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((void)(arch),xcorr_kernel_c(x, y, sum, len))
-#endif
-
#endif /* OVERRIDE_XCORR_KERNEL */
@@ -177,7 +174,7 @@ opus_val32
void
#endif
celt_pitch_xcorr_c(const opus_val16 *_x, const opus_val16 *_y,
- opus_val32 *xcorr, int len, int max_pitch, int arch);
+ opus_val32 *xcorr, int len, int max_pitch);
#if !defined(OVERRIDE_PITCH_XCORR)
/*Is run-time CPU detection enabled on this platform?*/
@@ -191,12 +188,20 @@ void
(*const CELT_PITCH_XCORR_IMPL[OPUS_ARCHMASK+1])(const opus_val16 *,
const opus_val16 *, opus_val32 *, int, int);
+# define OVERRIDE_PITCH_XCORR
# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
((*CELT_PITCH_XCORR_IMPL[(arch)&OPUS_ARCHMASK])(_x, _y, \
xcorr, len, max_pitch))
# else
-# define celt_pitch_xcorr(_x, _y, xcorr, len, max_pitch, arch) \
- ((void)(arch),celt_pitch_xcorr_c(_x, _y, xcorr, len, max_pitch, arch))
+
+#ifdef FIXED_POINT
+opus_val32
+#else
+void
+#endif
+celt_pitch_xcorr(const opus_val16 *_x, const opus_val16 *_y,
+ opus_val32 *xcorr, int len, int max_pitch, int arch);
+
# endif
#endif
diff --git a/celt/x86/pitch_sse.h b/celt/x86/pitch_sse.h
index 837e8ae2..99d1919a 100644
--- a/celt/x86/pitch_sse.h
+++ b/celt/x86/pitch_sse.h
@@ -43,14 +43,15 @@ void xcorr_kernel_sse4_1(
const opus_int16 *x,
const opus_int16 *y,
opus_val32 sum[4],
- int len );
+ int len);
extern void (*const XCORR_KERNEL_IMPL[OPUS_ARCHMASK + 1])(
const opus_int16 *x,
const opus_int16 *y,
opus_val32 sum[4],
- int len );
+ int len);
+#define OVERRIDE_XCORR_KERNEL
#define xcorr_kernel(x, y, sum, len, arch) \
((*XCORR_KERNEL_IMPL[(arch) & OPUS_ARCHMASK])(x, y, sum, len))