summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorsonarnerd <sonarnerd@0c269be4-1314-0410-8aa9-9f06e86f4224>2008-10-26 18:17:13 +0000
committersonarnerd <sonarnerd@0c269be4-1314-0410-8aa9-9f06e86f4224>2008-10-26 18:17:13 +0000
commitc49ed40f271197bd1985756ab8e7f79732b37e48 (patch)
tree5099764996a0c791d89cf9f23269a29d055cd0ee
parenta27a920f7c66aa326961f405e59150e4fc00cdfc (diff)
downloadjack1-c49ed40f271197bd1985756ab8e7f79732b37e48.tar.gz
Add scaling and clipping to SSE float-int and int-float conversions
git-svn-id: svn+ssh://jackaudio.org/trunk/jack@3053 0c269be4-1314-0410-8aa9-9f06e86f4224
-rw-r--r--configure.ac2
-rw-r--r--jack/intsimd.h4
-rw-r--r--libjack/simd.c59
3 files changed, 49 insertions, 16 deletions
diff --git a/configure.ac b/configure.ac
index e81e7ea..e8f523e 100644
--- a/configure.ac
+++ b/configure.ac
@@ -17,7 +17,7 @@ dnl changes are made
dnl ---
JACK_MAJOR_VERSION=0
JACK_MINOR_VERSION=115
-JACK_MICRO_VERSION=0
+JACK_MICRO_VERSION=1
dnl ---
dnl HOWTO: updating the jack protocol version
diff --git a/jack/intsimd.h b/jack/intsimd.h
index 29b3b7c..2377bd1 100644
--- a/jack/intsimd.h
+++ b/jack/intsimd.h
@@ -45,8 +45,8 @@ void x86_3dnow_copyf (float *, const float *, int);
void x86_3dnow_add2f (float *, const float *, int);
void x86_sse_copyf (float *, const float *, int);
void x86_sse_add2f (float *, const float *, int);
-void x86_sse_f2i (int *, const float *, int);
-void x86_sse_i2f (float *, const int *, int);
+void x86_sse_f2i (int *, const float *, int, float);
+void x86_sse_i2f (float *, const int *, int, float);
#endif /* ARCH_X86 */
diff --git a/libjack/simd.c b/libjack/simd.c
index def5656..6fabb85 100644
--- a/libjack/simd.c
+++ b/libjack/simd.c
@@ -329,20 +329,40 @@ sse_nonalign:
}
}
-void x86_sse_f2i (int *dest, const float *src, int length)
+void x86_sse_f2i (int *dest, const float *src, int length, float scale)
{
int i;
+ float max[4] __attribute__((aligned(16))) =
+ { -1.0F, -1.0F, -1.0F, -1.0F };
+ float min[4] __attribute__((aligned(16))) =
+ { 1.0F, 1.0F, 1.0F, 1.0F };
+ float s[4] __attribute__((aligned(16)));
+
+ s[0] = s[1] = s[2] = s[3] = scale;
+ asm volatile (
+ "movaps %0, %%xmm4\n\t" \
+ "movaps %1, %%xmm5\n\t" \
+ "movaps %2, %%xmm6\n\t"
+ :
+ : "m" (*max),
+ "m" (*min),
+ "m" (*s)
+ : "xmm4", "xmm5", "xmm6");
if (__builtin_expect((((long) dest & 0xf) || ((long) src & 0xf)), 0))
goto sse_nonalign;
for (i = 0; i < length; i += 4)
{
asm volatile (
- "cvtps2dq %1, %%xmm0\n\t" \
+ "movaps %1, %%xmm1\n\t" \
+ "maxps %%xmm4, %%xmm1\n\t" \
+ "minps %%xmm5, %%xmm1\n\t" \
+ "mulps %%xmm6, %%xmm1\n\t" \
+ "cvtps2dq %%xmm1, %%xmm0\n\t" \
"movdqa %%xmm0, %0\n\t"
: "=m" (dest[i])
: "m" (src[i])
- : "xmm0", "memory");
+ : "xmm0", "xmm1", "xmm4", "xmm5", "xmm6", "memory");
}
return;
@@ -350,19 +370,30 @@ sse_nonalign:
for (i = 0; i < length; i += 4)
{
asm volatile (
- "movups %1, %%xmm0\n\t" \
- "cvtps2dq %%xmm0, %%xmm1\n\t" \
- "movdqu %%xmm1, %0\n\t"
+ "movups %1, %%xmm1\n\t" \
+ "maxps %%xmm4, %%xmm1\n\t" \
+ "minps %%xmm5, %%xmm1\n\t" \
+ "mulps %%xmm6, %%xmm1\n\t" \
+ "cvtps2dq %%xmm1, %%xmm0\n\t" \
+ "movdqu %%xmm0, %0\n\t"
: "=m" (dest[i])
: "m" (src[i])
- : "xmm0", "xmm1", "memory");
+ : "xmm0", "xmm1", "xmm4", "xmm5", "xmm6", "memory");
}
}
-void x86_sse_i2f (float *dest, const int *src, int length)
+void x86_sse_i2f (float *dest, const int *src, int length, float scale)
{
int i;
+ float s[4] __attribute__((aligned(16)));
+
+ s[0] = s[1] = s[2] = s[3] = scale;
+ asm volatile (
+ "movaps %0, %%xmm4\n\t"
+ :
+ : "m" (*s)
+ : "xmm4" );
if (__builtin_expect((((long) dest & 0xf) || ((long) src & 0xf)), 0))
goto sse_nonalign;
@@ -370,10 +401,11 @@ void x86_sse_i2f (float *dest, const int *src, int length)
{
asm volatile (
"cvtdq2ps %1, %%xmm0\n\t" \
+ "mulps %%xmm4, %%xmm0\n\t" \
"movaps %%xmm0, %0\n\t"
: "=m" (dest[i])
: "m" (src[i])
- : "xmm0", "memory");
+ : "xmm0", "xmm4", "memory");
}
return;
@@ -381,12 +413,13 @@ sse_nonalign:
for (i = 0; i < length; i += 4)
{
asm volatile (
- "movdqu %1, %%xmm0\n\t" \
- "cvtdq2ps %%xmm0, %%xmm1\n\t" \
- "movups %%xmm1, %0\n\t"
+ "movdqu %1, %%xmm1\n\t" \
+ "cvtdq2ps %%xmm1, %%xmm0\n\t" \
+ "mulps %%xmm4, %%xmm0\n\t" \
+ "movups %%xmm0, %0\n\t"
: "=m" (dest[i])
: "m" (src[i])
- : "xmm0", "xmm1", "memory");
+ : "xmm0", "xmm1", "xmm4", "memory");
}
}