diff options
-rw-r--r-- | celt/_kiss_fft_guts.h | 16 | ||||
-rw-r--r-- | celt/arch.h | 3 | ||||
-rw-r--r-- | celt/fixed_debug.h | 6 | ||||
-rw-r--r-- | celt/fixed_generic.h | 6 | ||||
-rw-r--r-- | celt/kiss_fft.c | 56 | ||||
-rw-r--r-- | celt/mdct.c | 16 |
6 files changed, 59 insertions, 44 deletions
diff --git a/celt/_kiss_fft_guts.h b/celt/_kiss_fft_guts.h index 5e3d58fd..17392b3e 100644 --- a/celt/_kiss_fft_guts.h +++ b/celt/_kiss_fft_guts.h @@ -58,12 +58,12 @@ # define S_MUL(a,b) MULT16_32_Q15(b, a) # define C_MUL(m,a,b) \ - do{ (m).r = SUB32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ - (m).i = ADD32(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) + do{ (m).r = SUB32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ + (m).i = ADD32_ovflw(S_MUL((a).r,(b).i) , S_MUL((a).i,(b).r)); }while(0) # define C_MULC(m,a,b) \ - do{ (m).r = ADD32(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ - (m).i = SUB32(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) + do{ (m).r = ADD32_ovflw(S_MUL((a).r,(b).r) , S_MUL((a).i,(b).i)); \ + (m).i = SUB32_ovflw(S_MUL((a).i,(b).r) , S_MUL((a).r,(b).i)); }while(0) # define C_MULBYSCALAR( c, s ) \ do{ (c).r = S_MUL( (c).r , s ) ;\ @@ -77,17 +77,17 @@ DIVSCALAR( (c).i , div); }while (0) #define C_ADD( res, a,b)\ - do {(res).r=ADD32((a).r,(b).r); (res).i=ADD32((a).i,(b).i); \ + do {(res).r=ADD32_ovflw((a).r,(b).r); (res).i=ADD32_ovflw((a).i,(b).i); \ }while(0) #define C_SUB( res, a,b)\ - do {(res).r=SUB32((a).r,(b).r); (res).i=SUB32((a).i,(b).i); \ + do {(res).r=SUB32_ovflw((a).r,(b).r); (res).i=SUB32_ovflw((a).i,(b).i); \ }while(0) #define C_ADDTO( res , a)\ - do {(res).r = ADD32((res).r, (a).r); (res).i = ADD32((res).i,(a).i);\ + do {(res).r = ADD32_ovflw((res).r, (a).r); (res).i = ADD32_ovflw((res).i,(a).i);\ }while(0) #define C_SUBFROM( res , a)\ - do {(res).r = ADD32((res).r,(a).r); (res).i = SUB32((res).i,(a).i); \ + do {(res).r = ADD32_ovflw((res).r,(a).r); (res).i = SUB32_ovflw((res).i,(a).i); \ }while(0) #if defined(OPUS_ARM_INLINE_ASM) diff --git a/celt/arch.h b/celt/arch.h index 8ceab5fe..05e434b9 100644 --- a/celt/arch.h +++ b/celt/arch.h @@ -186,6 +186,7 @@ static OPUS_INLINE int celt_isnan(float x) #define NEG16(x) (-(x)) #define NEG32(x) (-(x)) +#define NEG32_ovflw(x) (-(x)) #define EXTRACT16(x) (x) #define EXTEND32(x) (x) #define SHR16(a,shift) (a) @@ -209,6 +210,8 @@ static OPUS_INLINE int celt_isnan(float x) #define SUB16(a,b) ((a)-(b)) #define ADD32(a,b) ((a)+(b)) #define SUB32(a,b) ((a)-(b)) +#define ADD32_ovflw(a,b) ((a)+(b)) +#define SUB32_ovflw(a,b) ((a)-(b)) #define MULT16_16_16(a,b) ((a)*(b)) #define MULT16_16(a,b) ((opus_val32)(a)*(opus_val32)(b)) #define MAC16_16(c,a,b) ((c)+(opus_val32)(a)*(opus_val32)(b)) diff --git a/celt/fixed_debug.h b/celt/fixed_debug.h index d28227f5..f55dbf92 100644 --- a/celt/fixed_debug.h +++ b/celt/fixed_debug.h @@ -59,6 +59,12 @@ extern opus_int64 celt_mips; #define SHR(a,b) SHR32(a,b) #define PSHR(a,b) PSHR32(a,b) +/** Add two 32-bit values, ignore any overflows */ +#define ADD32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)+(opus_uint32)(b))) +/** Subtract two 32-bit values, ignore any overflows */ +#define SUB32_ovflw(a,b) (celt_mips+=2,(opus_val32)((opus_uint32)(a)-(opus_uint32)(b))) +#define NEG32_ovflw(a) (celt_mips+=2,(opus_val32)(-(opus_uint32)(a))) + static OPUS_INLINE short NEG16(int x) { int res; diff --git a/celt/fixed_generic.h b/celt/fixed_generic.h index 1cfd6d69..32e38ff0 100644 --- a/celt/fixed_generic.h +++ b/celt/fixed_generic.h @@ -117,6 +117,12 @@ /** Subtract two 32-bit values */ #define SUB32(a,b) ((opus_val32)(a)-(opus_val32)(b)) +/** Add two 32-bit values, ignore any overflows */ +#define ADD32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)+(opus_uint32)(b))) +/** Subtract two 32-bit values, ignore any overflows */ +#define SUB32_ovflw(a,b) ((opus_val32)((opus_uint32)(a)-(opus_uint32)(b))) +#define NEG32_ovflw(a) ((opus_val32)(-(opus_uint32)(a))) + /** 16x16 multiplication where the result fits in 16 bits */ #define MULT16_16_16(a,b) ((((opus_val16)(a))*((opus_val16)(b)))) diff --git a/celt/kiss_fft.c b/celt/kiss_fft.c index 1f8fd053..83775165 100644 --- a/celt/kiss_fft.c +++ b/celt/kiss_fft.c @@ -82,8 +82,8 @@ static void kf_bfly2( C_SUB( Fout2[0] , Fout[0] , t ); C_ADDTO( Fout[0] , t ); - t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw); - t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw); + t.r = S_MUL(ADD32_ovflw(Fout2[1].r, Fout2[1].i), tw); + t.i = S_MUL(SUB32_ovflw(Fout2[1].i, Fout2[1].r), tw); C_SUB( Fout2[1] , Fout[1] , t ); C_ADDTO( Fout[1] , t ); @@ -92,8 +92,8 @@ static void kf_bfly2( C_SUB( Fout2[2] , Fout[2] , t ); C_ADDTO( Fout[2] , t ); - t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw); - t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw); + t.r = S_MUL(SUB32_ovflw(Fout2[3].i, Fout2[3].r), tw); + t.i = S_MUL(NEG32_ovflw(ADD32_ovflw(Fout2[3].i, Fout2[3].r)), tw); C_SUB( Fout2[3] , Fout[3] , t ); C_ADDTO( Fout[3] , t ); Fout += 8; @@ -126,10 +126,10 @@ static void kf_bfly4( C_ADDTO( *Fout , scratch1 ); C_SUB( scratch1 , Fout[1] , Fout[3] ); - Fout[1].r = scratch0.r + scratch1.i; - Fout[1].i = scratch0.i - scratch1.r; - Fout[3].r = scratch0.r - scratch1.i; - Fout[3].i = scratch0.i + scratch1.r; + Fout[1].r = ADD32_ovflw(scratch0.r, scratch1.i); + Fout[1].i = SUB32_ovflw(scratch0.i, scratch1.r); + Fout[3].r = SUB32_ovflw(scratch0.r, scratch1.i); + Fout[3].i = ADD32_ovflw(scratch0.i, scratch1.r); Fout+=4; } } else { @@ -160,10 +160,10 @@ static void kf_bfly4( tw3 += fstride*3; C_ADDTO( *Fout , scratch[3] ); - Fout[m].r = scratch[5].r + scratch[4].i; - Fout[m].i = scratch[5].i - scratch[4].r; - Fout[m3].r = scratch[5].r - scratch[4].i; - Fout[m3].i = scratch[5].i + scratch[4].r; + Fout[m].r = ADD32_ovflw(scratch[5].r, scratch[4].i); + Fout[m].i = SUB32_ovflw(scratch[5].i, scratch[4].r); + Fout[m3].r = SUB32_ovflw(scratch[5].r, scratch[4].i); + Fout[m3].i = ADD32_ovflw(scratch[5].i, scratch[4].r); ++Fout; } } @@ -212,18 +212,18 @@ static void kf_bfly3( tw1 += fstride; tw2 += fstride*2; - Fout[m].r = Fout->r - HALF_OF(scratch[3].r); - Fout[m].i = Fout->i - HALF_OF(scratch[3].i); + Fout[m].r = SUB32_ovflw(Fout->r, HALF_OF(scratch[3].r)); + Fout[m].i = SUB32_ovflw(Fout->i, HALF_OF(scratch[3].i)); C_MULBYSCALAR( scratch[0] , epi3.i ); C_ADDTO(*Fout,scratch[3]); - Fout[m2].r = Fout[m].r + scratch[0].i; - Fout[m2].i = Fout[m].i - scratch[0].r; + Fout[m2].r = ADD32_ovflw(Fout[m].r, scratch[0].i); + Fout[m2].i = SUB32_ovflw(Fout[m].i, scratch[0].r); - Fout[m].r -= scratch[0].i; - Fout[m].i += scratch[0].r; + Fout[m].r = SUB32_ovflw(Fout[m].r, scratch[0].i); + Fout[m].i = ADD32_ovflw(Fout[m].i, scratch[0].r); ++Fout; } while(--k); @@ -282,22 +282,22 @@ static void kf_bfly5( C_ADD( scratch[8],scratch[2],scratch[3]); C_SUB( scratch[9],scratch[2],scratch[3]); - Fout0->r += scratch[7].r + scratch[8].r; - Fout0->i += scratch[7].i + scratch[8].i; + Fout0->r = ADD32_ovflw(Fout0->r, ADD32_ovflw(scratch[7].r, scratch[8].r)); + Fout0->i = ADD32_ovflw(Fout0->i, ADD32_ovflw(scratch[7].i, scratch[8].i)); - scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); - scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); + scratch[5].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,ya.r), S_MUL(scratch[8].r,yb.r))); + scratch[5].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,ya.r), S_MUL(scratch[8].i,yb.r))); - scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); - scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); + scratch[6].r = ADD32_ovflw(S_MUL(scratch[10].i,ya.i), S_MUL(scratch[9].i,yb.i)); + scratch[6].i = NEG32_ovflw(ADD32_ovflw(S_MUL(scratch[10].r,ya.i), S_MUL(scratch[9].r,yb.i))); C_SUB(*Fout1,scratch[5],scratch[6]); C_ADD(*Fout4,scratch[5],scratch[6]); - scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); - scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); - scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); - scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); + scratch[11].r = ADD32_ovflw(scratch[0].r, ADD32_ovflw(S_MUL(scratch[7].r,yb.r), S_MUL(scratch[8].r,ya.r))); + scratch[11].i = ADD32_ovflw(scratch[0].i, ADD32_ovflw(S_MUL(scratch[7].i,yb.r), S_MUL(scratch[8].i,ya.r))); + scratch[12].r = SUB32_ovflw(S_MUL(scratch[9].i,ya.i), S_MUL(scratch[10].i,yb.i)); + scratch[12].i = SUB32_ovflw(S_MUL(scratch[10].r,yb.i), S_MUL(scratch[9].r,ya.i)); C_ADD(*Fout2,scratch[11],scratch[12]); C_SUB(*Fout3,scratch[11],scratch[12]); diff --git a/celt/mdct.c b/celt/mdct.c index 5315ad11..5c6dab5b 100644 --- a/celt/mdct.c +++ b/celt/mdct.c @@ -270,8 +270,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca int rev; kiss_fft_scalar yr, yi; rev = *bitrev++; - yr = S_MUL(*xp2, t[i]) + S_MUL(*xp1, t[N4+i]); - yi = S_MUL(*xp1, t[i]) - S_MUL(*xp2, t[N4+i]); + yr = ADD32_ovflw(S_MUL(*xp2, t[i]), S_MUL(*xp1, t[N4+i])); + yi = SUB32_ovflw(S_MUL(*xp1, t[i]), S_MUL(*xp2, t[N4+i])); /* We swap real and imag because we use an FFT instead of an IFFT. */ yp[2*rev+1] = yr; yp[2*rev] = yi; @@ -301,8 +301,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca t0 = t[i]; t1 = t[N4+i]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); + yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)); + yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)); /* We swap real and imag because we're using an FFT instead of an IFFT. */ re = yp1[1]; im = yp1[0]; @@ -312,8 +312,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca t0 = t[(N4-i-1)]; t1 = t[(N2-i-1)]; /* We'd scale up by 2 here, but instead it's done when mixing the windows */ - yr = S_MUL(re,t0) + S_MUL(im,t1); - yi = S_MUL(re,t1) - S_MUL(im,t0); + yr = ADD32_ovflw(S_MUL(re,t0), S_MUL(im,t1)); + yi = SUB32_ovflw(S_MUL(re,t1), S_MUL(im,t0)); yp1[0] = yr; yp0[1] = yi; yp0 += 2; @@ -333,8 +333,8 @@ void clt_mdct_backward_c(const mdct_lookup *l, kiss_fft_scalar *in, kiss_fft_sca kiss_fft_scalar x1, x2; x1 = *xp1; x2 = *yp1; - *yp1++ = MULT16_32_Q15(*wp2, x2) - MULT16_32_Q15(*wp1, x1); - *xp1-- = MULT16_32_Q15(*wp1, x2) + MULT16_32_Q15(*wp2, x1); + *yp1++ = SUB32_ovflw(MULT16_32_Q15(*wp2, x2), MULT16_32_Q15(*wp1, x1)); + *xp1-- = ADD32_ovflw(MULT16_32_Q15(*wp1, x2), MULT16_32_Q15(*wp2, x1)); wp1++; wp2--; } |