diff options
author | Monty <xiphmont@xiph.org> | 2003-04-21 22:12:52 +0000 |
---|---|---|
committer | Monty <xiphmont@xiph.org> | 2003-04-21 22:12:52 +0000 |
commit | 96389c3f1dde0c46d72accaaa9ac6fbc12867821 (patch) | |
tree | f60ece797aecf240c24a84977a4dda54e850efb7 | |
parent | 5a6c26ad884f852dde1c17bb88a83e7e51f07fb2 (diff) | |
download | tremor-96389c3f1dde0c46d72accaaa9ac6fbc12867821.tar.gz |
mdct work continued; no working space, reduced extraneous load/store
steps. pcmout must now be more tightly integrated with vorbisfile;
last stage of mdct renders directly into 16 bit PCM output.
git-svn-id: https://svn.xiph.org/branches/lowmem-branch/Tremor@4621 0101bb08-14d6-0310-b084-bc0e0c8e3800
-rw-r--r-- | mdct.c | 453 | ||||
-rw-r--r-- | mdct.h | 13 |
2 files changed, 250 insertions, 216 deletions
@@ -6,14 +6,14 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * + * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2003 * * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * * * ******************************************************************** function: normalized modified discrete cosine transform power of two length transform only [64 <= n ] - last mod: $Id: mdct.c,v 1.9.6.1 2003/04/19 06:31:30 xiphmont Exp $ + last mod: $Id: mdct.c,v 1.9.6.2 2003/04/21 22:12:52 xiphmont Exp $ Original algorithm adapted long ago from _The use of multirate filter banks for coding of high quality digital audio_, by T. Sporer, @@ -38,6 +38,46 @@ #include "mdct.h" #include "mdct_lookup.h" +STIN void presymmetry(DATA_TYPE *in,int n2,int step){ + DATA_TYPE *aX; + DATA_TYPE *bX; + LOOKUP_T *T; + int n4=n2>>1; + + aX = in+n2-3; + T = sincos_lookup0; + + do{ + REG_TYPE r0= aX[0]; + REG_TYPE r2= aX[2]; + XPROD31( r0, r2, T[0], T[1], &aX[0], &aX[2] ); T+=step; + aX-=4; + }while(aX>=in+n4); + do{ + REG_TYPE r0= aX[0]; + REG_TYPE r2= aX[2]; + XPROD31( r0, r2, T[1], T[0], &aX[0], &aX[2] ); T-=step; + aX-=4; + }while(aX>=in); + + aX = in+n2-4; + bX = in; + T = sincos_lookup0; + do{ + REG_TYPE ri0= aX[0]; + REG_TYPE ri2= aX[2]; + REG_TYPE ro0= bX[0]; + REG_TYPE ro2= bX[2]; + + XNPROD31( ro2, ro0, T[1], T[0], &aX[0], &aX[2] ); T+=step; + XNPROD31( ri2, ri0, T[0], T[1], &bX[0], &bX[2] ); + + aX-=4; + bX+=4; + }while(aX>=in+n4); + +} + /* 8 point butterfly (in place) */ STIN void mdct_butterfly_8(DATA_TYPE *x){ @@ -181,98 +221,165 @@ STIN int bitrev12(int x){ return bitrev[x>>8]|(bitrev[(x&0x0f0)>>4]<<4)|(((int)bitrev[x&0x00f])<<8); } -STIN void mdct_bitreverse(DATA_TYPE *x,int n,int step,int shift){ - +STIN void mdct_bitreverse(DATA_TYPE *x,int n,int shift){ int bit = 0; + DATA_TYPE *w = x+(n>>1); + + do{ + DATA_TYPE b = bitrev12(bit++); + DATA_TYPE *xx = x + (b>>shift); + REG_TYPE r; + + w -= 2; + + if(w>xx){ + + r = xx[0]; + xx[0] = w[0]; + w[0] = r; + + r = xx[1]; + xx[1] = w[1]; + w[1] = r; + } + }while(w>x); +} + +STIN void mdct_step7(DATA_TYPE *x,int n,int step){ DATA_TYPE *w0 = x; - DATA_TYPE *w1 = x = w0+(n>>1); + DATA_TYPE *w1 = x+(n>>1); LOOKUP_T *T = (step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; LOOKUP_T *Ttop = T+1024; - DATA_TYPE r2; - + REG_TYPE r0, r1, r2, r3; + do{ - DATA_TYPE r3 = bitrev12(bit++); - DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1; - DATA_TYPE *x1 = x + (r3>>shift); - - REG_TYPE r0 = x0[0] + x1[0]; - REG_TYPE r1 = x1[1] - x0[1]; + w1 -= 2; - XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step; + r0 = w0[0] + w1[0]; + r1 = w1[1] - w0[1]; + r2 = MULT32(r0, T[1]) + MULT32(r1, T[0]); + r3 = MULT32(r1, T[1]) - MULT32(r0, T[0]); + T+=step; - w1 -= 4; - - r0 = (x0[1] + x1[1])>>1; - r1 = (x0[0] - x1[0])>>1; + r0 = (w0[1] + w1[1])>>1; + r1 = (w0[0] - w1[0])>>1; w0[0] = r0 + r2; w0[1] = r1 + r3; - w1[2] = r0 - r2; - w1[3] = r3 - r1; - - r3 = bitrev12(bit++); - x0 = x + ((r3 ^ 0xfff)>>shift) -1; - x1 = x + (r3>>shift); - - r0 = x0[0] + x1[0]; - r1 = x1[1] - x0[1]; - - XPROD32( r0, r1, T[1], T[0], &r2, &r3 ); T+=step; - - r0 = (x0[1] + x1[1])>>1; - r1 = (x0[0] - x1[0])>>1; - w0[2] = r0 + r2; - w0[3] = r1 + r3; w1[0] = r0 - r2; w1[1] = r3 - r1; - w0 += 4; + w0 += 2; }while(T<Ttop); do{ - DATA_TYPE r3 = bitrev12(bit++); - DATA_TYPE *x0 = x + ((r3 ^ 0xfff)>>shift) -1; - DATA_TYPE *x1 = x + (r3>>shift); - - REG_TYPE r0 = x0[0] + x1[0]; - REG_TYPE r1 = x1[1] - x0[1]; + w1 -= 2; - T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 ); + r0 = w0[0] + w1[0]; + r1 = w1[1] - w0[1]; + T-=step; + r2 = MULT32(r0, T[0]) + MULT32(r1, T[1]); + r3 = MULT32(r1, T[0]) - MULT32(r0, T[1]); - w1 -= 4; - - r0 = (x0[1] + x1[1])>>1; - r1 = (x0[0] - x1[0])>>1; + r0 = (w0[1] + w1[1])>>1; + r1 = (w0[0] - w1[0])>>1; w0[0] = r0 + r2; w0[1] = r1 + r3; - w1[2] = r0 - r2; - w1[3] = r3 - r1; - - r3 = bitrev12(bit++); - x0 = x + ((r3 ^ 0xfff)>>shift) -1; - x1 = x + (r3>>shift); - - r0 = x0[0] + x1[0]; - r1 = x1[1] - x0[1]; - - T-=step; XPROD32( r0, r1, T[0], T[1], &r2, &r3 ); - - r0 = (x0[1] + x1[1])>>1; - r1 = (x0[0] - x1[0])>>1; - w0[2] = r0 + r2; - w0[3] = r1 + r3; w1[0] = r0 - r2; w1[1] = r3 - r1; - w0 += 4; + w0 += 2; }while(w0<w1); } -void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){ - int n2=n>>1; - int n4=n>>2; - DATA_TYPE *iX; - DATA_TYPE *oX; +STIN void mdct_step8(DATA_TYPE *x, int n, int step){ LOOKUP_T *T; LOOKUP_T *V; + DATA_TYPE *iX =x+(n>>1); + step>>=2; + + switch(step) { + default: + T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; + do{ + REG_TYPE r0 = x[0]; + REG_TYPE r1 = -x[1]; + XPROD31( r0, r1, T[0], T[1], x, x+1); T+=step; + x +=2; + }while(x<iX); + break; + + case 1: + { + /* linear interpolation between table values: offset=0.5, step=1 */ + REG_TYPE t0,t1,v0,v1,r0,r1; + T = sincos_lookup0; + V = sincos_lookup1; + t0 = (*T++)>>1; + t1 = (*T++)>>1; + do{ + r0 = x[0]; + r1 = -x[1]; + t0 += (v0 = (*V++)>>1); + t1 += (v1 = (*V++)>>1); + XPROD31( r0, r1, t0, t1, x, x+1 ); + + r0 = x[2]; + r1 = -x[3]; + v0 += (t0 = (*T++)>>1); + v1 += (t1 = (*T++)>>1); + XPROD31( r0, r1, v0, v1, x+2, x+3 ); + + x += 4; + }while(x<iX); + break; + } + + case 0: + { + /* linear interpolation between table values: offset=0.25, step=0.5 */ + REG_TYPE t0,t1,v0,v1,q0,q1,r0,r1; + T = sincos_lookup0; + V = sincos_lookup1; + t0 = *T++; + t1 = *T++; + do{ + + + v0 = *V++; + v1 = *V++; + t0 += (q0 = (v0-t0)>>2); + t1 += (q1 = (v1-t1)>>2); + r0 = x[0]; + r1 = -x[1]; + XPROD31( r0, r1, t0, t1, x, x+1 ); + t0 = v0-q0; + t1 = v1-q1; + r0 = x[2]; + r1 = -x[3]; + XPROD31( r0, r1, t0, t1, x+2, x+3 ); + + t0 = *T++; + t1 = *T++; + v0 += (q0 = (t0-v0)>>2); + v1 += (q1 = (t1-v1)>>2); + r0 = x[4]; + r1 = -x[5]; + XPROD31( r0, r1, v0, v1, x+4, x+5 ); + v0 = t0-q0; + v1 = t1-q1; + r0 = x[6]; + r1 = -x[7]; + XPROD31( r0, r1, v0, v1, x+5, x+6 ); + + x+=8; + }while(x<iX); + break; + } + } +} + +/* partial; doesn't perform last-step deinterleave/unrolling. That + can be done more efficiently during pcm output */ +void mdct_backward(int n, DATA_TYPE *in){ int shift; int step; @@ -280,157 +387,77 @@ void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out){ shift=13-shift; step=2<<shift; - /* rotate */ + presymmetry(in,n>>1,step); + mdct_butterflies(in,n>>1,shift); + mdct_bitreverse(in,n,shift); + mdct_step7(in,n,step); + mdct_step8(in,n,step); +} - iX = in+n2-3; - T = sincos_lookup0; +void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right){ + int i; + n>>=2; + in+=1; - do{ - REG_TYPE r0= iX[0]; - REG_TYPE r2= iX[2]; - XPROD31( r0, r2, T[0], T[1], &iX[0], &iX[2] ); T+=step; - iX-=4; - }while(iX>=in+n4); - do{ - REG_TYPE r0= iX[0]; - REG_TYPE r2= iX[2]; - XPROD31( r0, r2, T[1], T[0], &iX[0], &iX[2] ); T-=step; - iX-=4; - }while(iX>=in); - - iX = in+n2-4; - oX = in; - T = sincos_lookup0; - do{ - REG_TYPE ri0= iX[0]; - REG_TYPE ri2= iX[2]; - REG_TYPE ro0= oX[0]; - REG_TYPE ro2= oX[2]; - - XNPROD31( ro2, ro0, T[1], T[0], &iX[0], &iX[2] ); T+=step; - XNPROD31( ri2, ri0, T[0], T[1], &oX[0], &oX[2] ); - - iX-=4; - oX+=4; - }while(iX>=in+n4); + for(i=0;i<n;i++) + right[i]=in[i<<1]; +} - mdct_butterflies(in,n2,shift); - mdct_bitreverse(out,n,step,shift); +void mdct_unroll_lap(int n0,int n1,int lW,int W, + DATA_TYPE *in, + DATA_TYPE *right, + LOOKUP_T *w0, + LOOKUP_T *w1, + ogg_int16_t *out, + int step){ + + DATA_TYPE *l=in+(W&&lW ? n1>>1 : n0>>1); + DATA_TYPE *r=right+(lW ? n1>>2 : n0>>2); + DATA_TYPE *post; + LOOKUP_T *wR; + LOOKUP_T *wL; - /* rotate + window */ - - step>>=2; - { - DATA_TYPE *oX1=out+n2+n4; - DATA_TYPE *oX2=out+n2+n4; - DATA_TYPE *iX =out; - - switch(step) { - default: { - T=(step>=4)?(sincos_lookup0+(step>>1)):sincos_lookup1; - do{ - oX1-=4; - XPROD31( iX[0], -iX[1], T[0], T[1], &oX1[3], &oX2[0] ); T+=step; - XPROD31( iX[2], -iX[3], T[0], T[1], &oX1[2], &oX2[1] ); T+=step; - XPROD31( iX[4], -iX[5], T[0], T[1], &oX1[1], &oX2[2] ); T+=step; - XPROD31( iX[6], -iX[7], T[0], T[1], &oX1[0], &oX2[3] ); T+=step; - oX2+=4; - iX+=8; - }while(iX<oX1); - break; - } - - case 1: { - /* linear interpolation between table values: offset=0.5, step=1 */ - REG_TYPE t0,t1,v0,v1; - T = sincos_lookup0; - V = sincos_lookup1; - t0 = (*T++)>>1; - t1 = (*T++)>>1; - do{ - oX1-=4; - - t0 += (v0 = (*V++)>>1); - t1 += (v1 = (*V++)>>1); - XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); - v0 += (t0 = (*T++)>>1); - v1 += (t1 = (*T++)>>1); - XPROD31( iX[2], -iX[3], v0, v1, &oX1[2], &oX2[1] ); - t0 += (v0 = (*V++)>>1); - t1 += (v1 = (*V++)>>1); - XPROD31( iX[4], -iX[5], t0, t1, &oX1[1], &oX2[2] ); - v0 += (t0 = (*T++)>>1); - v1 += (t1 = (*T++)>>1); - XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); - - oX2+=4; - iX+=8; - }while(iX<oX1); - break; - } - - case 0: { - /* linear interpolation between table values: offset=0.25, step=0.5 */ - REG_TYPE t0,t1,v0,v1,q0,q1; - T = sincos_lookup0; - V = sincos_lookup1; - t0 = *T++; - t1 = *T++; - do{ - oX1-=4; - - v0 = *V++; - v1 = *V++; - t0 += (q0 = (v0-t0)>>2); - t1 += (q1 = (v1-t1)>>2); - XPROD31( iX[0], -iX[1], t0, t1, &oX1[3], &oX2[0] ); - t0 = v0-q0; - t1 = v1-q1; - XPROD31( iX[2], -iX[3], t0, t1, &oX1[2], &oX2[1] ); - - t0 = *T++; - t1 = *T++; - v0 += (q0 = (t0-v0)>>2); - v1 += (q1 = (t1-v1)>>2); - XPROD31( iX[4], -iX[5], v0, v1, &oX1[1], &oX2[2] ); - v0 = t0-q0; - v1 = t1-q1; - XPROD31( iX[6], -iX[7], v0, v1, &oX1[0], &oX2[3] ); - - oX2+=4; - iX+=8; - }while(iX<oX1); - break; - } + if(lW){ + if(!W){ + /* copy pre-lap from previous frame */ + post=right+(n0>>2); + do{ + out++ = CLIP_TO_15((*--r)>>9); + out+=step; + }while(r>post); } + } + + /* cross-lap; two halves due to wrap-around */ + if(W && lW){ + wL=w1; + wR=w1+(n1>>1); + post=right+(n1>>2); + }else{ + wL=w0; + wR=w0+(n0>>1); + post=right+(n0>>2); + } - iX=out+n2+n4; - oX1=out+n4; - oX2=oX1; - - do{ - oX1-=4; - iX-=4; - - oX2[0] = -(oX1[3] = iX[3]); - oX2[1] = -(oX1[2] = iX[2]); - oX2[2] = -(oX1[1] = iX[1]); - oX2[3] = -(oX1[0] = iX[0]); - - oX2+=4; - }while(oX2<iX); - - iX=out+n2+n4; - oX1=out+n2+n4; - oX2=out+n2; - + do{ + l-=2; + *out = CLIP_TO_15((MULT31(*--r,*--wR) + MULT31(*l,*wL++))>>9); + out+=step; + }while(r>right); + do{ + *out = CLIP_TO_15((MULT31(*r++,*--wR) - MULT31(*l,*wL++))>>9); + out+=step; + l+=2; + }while(r<post); + + if(!lW && W){ + /* post-lap from current frame */ + post=in+(n1>>1); do{ - oX1-=4; - oX1[0]= iX[3]; - oX1[1]= iX[2]; - oX1[2]= iX[1]; - oX1[3]= iX[0]; - iX+=4; - }while(oX1>oX2); + *out = CLIP_TO_15((-*l)>>15); + out+=step; + l+=2; + }while(l<post); } } + @@ -6,7 +6,7 @@ * GOVERNED BY A BSD-STYLE SOURCE LICENSE INCLUDED WITH THIS SOURCE * * IN 'COPYING'. PLEASE READ THESE TERMS BEFORE DISTRIBUTING. * * * - * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2002 * + * THE OggVorbis 'TREMOR' SOURCE CODE IS (C) COPYRIGHT 1994-2003 * * BY THE Xiph.Org FOUNDATION http://www.xiph.org/ * * * ******************************************************************** @@ -34,8 +34,15 @@ #define cPI1_8 (0x7641af3d) #endif -extern void mdct_forward(int n, DATA_TYPE *in, DATA_TYPE *out); -extern void mdct_backward(int n, DATA_TYPE *in, DATA_TYPE *out); +extern void mdct_backward(int n, DATA_TYPE *in); +extern void mdct_shift_right(int n, DATA_TYPE *in, DATA_TYPE *right); +extern void mdct_unroll_lap(int n0,int n1,int lW,int W, + DATA_TYPE *in, + DATA_TYPE *right, + LOOKUP_T *w0, + LOOKUP_T *w1, + ogg_int16_t *out, + int step); #endif |