diff options
author | sonarnerd <sonarnerd@0c269be4-1314-0410-8aa9-9f06e86f4224> | 2008-10-26 18:17:13 +0000 |
---|---|---|
committer | sonarnerd <sonarnerd@0c269be4-1314-0410-8aa9-9f06e86f4224> | 2008-10-26 18:17:13 +0000 |
commit | c49ed40f271197bd1985756ab8e7f79732b37e48 (patch) | |
tree | 5099764996a0c791d89cf9f23269a29d055cd0ee | |
parent | a27a920f7c66aa326961f405e59150e4fc00cdfc (diff) | |
download | jack1-c49ed40f271197bd1985756ab8e7f79732b37e48.tar.gz |
Add scaling and clipping to SSE float-int and int-float conversions
git-svn-id: svn+ssh://jackaudio.org/trunk/jack@3053 0c269be4-1314-0410-8aa9-9f06e86f4224
-rw-r--r-- | configure.ac | 2 | ||||
-rw-r--r-- | jack/intsimd.h | 4 | ||||
-rw-r--r-- | libjack/simd.c | 59 |
3 files changed, 49 insertions, 16 deletions
diff --git a/configure.ac b/configure.ac index e81e7ea..e8f523e 100644 --- a/configure.ac +++ b/configure.ac @@ -17,7 +17,7 @@ dnl changes are made dnl --- JACK_MAJOR_VERSION=0 JACK_MINOR_VERSION=115 -JACK_MICRO_VERSION=0 +JACK_MICRO_VERSION=1 dnl --- dnl HOWTO: updating the jack protocol version diff --git a/jack/intsimd.h b/jack/intsimd.h index 29b3b7c..2377bd1 100644 --- a/jack/intsimd.h +++ b/jack/intsimd.h @@ -45,8 +45,8 @@ void x86_3dnow_copyf (float *, const float *, int); void x86_3dnow_add2f (float *, const float *, int); void x86_sse_copyf (float *, const float *, int); void x86_sse_add2f (float *, const float *, int); -void x86_sse_f2i (int *, const float *, int); -void x86_sse_i2f (float *, const int *, int); +void x86_sse_f2i (int *, const float *, int, float); +void x86_sse_i2f (float *, const int *, int, float); #endif /* ARCH_X86 */ diff --git a/libjack/simd.c b/libjack/simd.c index def5656..6fabb85 100644 --- a/libjack/simd.c +++ b/libjack/simd.c @@ -329,20 +329,40 @@ sse_nonalign: } } -void x86_sse_f2i (int *dest, const float *src, int length) +void x86_sse_f2i (int *dest, const float *src, int length, float scale) { int i; + float max[4] __attribute__((aligned(16))) = + { -1.0F, -1.0F, -1.0F, -1.0F }; + float min[4] __attribute__((aligned(16))) = + { 1.0F, 1.0F, 1.0F, 1.0F }; + float s[4] __attribute__((aligned(16))); + + s[0] = s[1] = s[2] = s[3] = scale; + asm volatile ( + "movaps %0, %%xmm4\n\t" \ + "movaps %1, %%xmm5\n\t" \ + "movaps %2, %%xmm6\n\t" + : + : "m" (*max), + "m" (*min), + "m" (*s) + : "xmm4", "xmm5", "xmm6"); if (__builtin_expect((((long) dest & 0xf) || ((long) src & 0xf)), 0)) goto sse_nonalign; for (i = 0; i < length; i += 4) { asm volatile ( - "cvtps2dq %1, %%xmm0\n\t" \ + "movaps %1, %%xmm1\n\t" \ + "maxps %%xmm4, %%xmm1\n\t" \ + "minps %%xmm5, %%xmm1\n\t" \ + "mulps %%xmm6, %%xmm1\n\t" \ + "cvtps2dq %%xmm1, %%xmm0\n\t" \ "movdqa %%xmm0, %0\n\t" : "=m" (dest[i]) : "m" (src[i]) - : "xmm0", "memory"); + : "xmm0", "xmm1", "xmm4", "xmm5", "xmm6", "memory"); } return; @@ -350,19 +370,30 @@ sse_nonalign: for (i = 0; i < length; i += 4) { asm volatile ( - "movups %1, %%xmm0\n\t" \ - "cvtps2dq %%xmm0, %%xmm1\n\t" \ - "movdqu %%xmm1, %0\n\t" + "movups %1, %%xmm1\n\t" \ + "maxps %%xmm4, %%xmm1\n\t" \ + "minps %%xmm5, %%xmm1\n\t" \ + "mulps %%xmm6, %%xmm1\n\t" \ + "cvtps2dq %%xmm1, %%xmm0\n\t" \ + "movdqu %%xmm0, %0\n\t" : "=m" (dest[i]) : "m" (src[i]) - : "xmm0", "xmm1", "memory"); + : "xmm0", "xmm1", "xmm4", "xmm5", "xmm6", "memory"); } } -void x86_sse_i2f (float *dest, const int *src, int length) +void x86_sse_i2f (float *dest, const int *src, int length, float scale) { int i; + float s[4] __attribute__((aligned(16))); + + s[0] = s[1] = s[2] = s[3] = scale; + asm volatile ( + "movaps %0, %%xmm4\n\t" + : + : "m" (*s) + : "xmm4" ); if (__builtin_expect((((long) dest & 0xf) || ((long) src & 0xf)), 0)) goto sse_nonalign; @@ -370,10 +401,11 @@ void x86_sse_i2f (float *dest, const int *src, int length) { asm volatile ( "cvtdq2ps %1, %%xmm0\n\t" \ + "mulps %%xmm4, %%xmm0\n\t" \ "movaps %%xmm0, %0\n\t" : "=m" (dest[i]) : "m" (src[i]) - : "xmm0", "memory"); + : "xmm0", "xmm4", "memory"); } return; @@ -381,12 +413,13 @@ sse_nonalign: for (i = 0; i < length; i += 4) { asm volatile ( - "movdqu %1, %%xmm0\n\t" \ - "cvtdq2ps %%xmm0, %%xmm1\n\t" \ - "movups %%xmm1, %0\n\t" + "movdqu %1, %%xmm1\n\t" \ + "cvtdq2ps %%xmm1, %%xmm0\n\t" \ + "mulps %%xmm4, %%xmm0\n\t" \ + "movups %%xmm0, %0\n\t" : "=m" (dest[i]) : "m" (src[i]) - : "xmm0", "xmm1", "memory"); + : "xmm0", "xmm1", "xmm4", "memory"); } } |