memops: Align float to S24LE and S32LE conversion

Signed-off-by: Timo Wischer <twischer@de.adit-jv.com>
author: Timo Wischer <twischer@de.adit-jv.com> 2018-11-16 17:08:20 +0100
committer: Filipe Coelho <falktx@falktx.com> 2019-01-17 00:56:39 +0100
commit: 4455fe020ca890d8c6b24f89a7bf91f9044b9d33 (patch)
tree: 553a4081b6f5aa5dc0db015086ccb1733f5805bd
parent: a82f3f2fb44e236e5bf3b207349ca000cd01185c (diff)
download: jack2-4455fe020ca890d8c6b24f89a7bf91f9044b9d33.tar.gz
1 files changed, 29 insertions, 19 deletions
diff --git a/common/memops.c b/common/memops.c
index 5d9f229d..17a2e5e0 100644
--- a/common/memops.c
+++ b/common/memops.c
@@ -83,8 +83,6 @@
 
 #define SAMPLE_24BIT_MAX  8388607  
 #define SAMPLE_24BIT_MIN  -8388607 
-#define SAMPLE_24BIT_MAX_F  8388607.0f  
-#define SAMPLE_24BIT_MIN_F  -8388607.0f 
 
 #define SAMPLE_16BIT_MAX  32767
 #define SAMPLE_16BIT_MIN  -32767
@@ -128,13 +126,13 @@
 	        (d) = f_round ((s));\
 	}
 
-#define float_24u32(s, d) \
+#define float_32(s, d, scale) \
 	if ((s) <= NORMALIZED_FLOAT_MIN) {\
-		(d) = SAMPLE_24BIT_MIN;\
+		(d) = -scale;\
 	} else if ((s) >= NORMALIZED_FLOAT_MAX) {\
-		(d) = SAMPLE_24BIT_MAX;\
+		(d) = scale;\
 	} else {\
-		(d) = f_round ((s) * SAMPLE_24BIT_SCALING);\
+		(d) = f_round ((s) * scale);\
 	}
 
 
@@ -182,13 +180,13 @@ static inline float32x4_t clip(float32x4_t s, float32x4_t min, float32x4_t max)
 	return vminq_f32(max, vmaxq_f32(s, min));
 }
 
-static inline int32x4_t float_24_neon(float32x4_t s)
+static inline int32x4_t float_32_neon(float32x4_t s, const int32_t scaling)
 {
 	const float32x4_t upper_bound = vdupq_n_f32(NORMALIZED_FLOAT_MAX);
 	const float32x4_t lower_bound = vdupq_n_f32(NORMALIZED_FLOAT_MIN);
 
 	float32x4_t clipped = clip(s, lower_bound, upper_bound);
-	float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(SAMPLE_24BIT_SCALING));
+	float32x4_t scaled = vmulq_f32(clipped, vdupq_n_f32(scaling));
 	return vcvtq_s32_f32(scaled);
 }
 
@@ -256,7 +254,7 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign
 
 /* functions for native integer sample data */
 
-void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
+static inline void sample_move_d32scal_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state, const int32_t scaling)
 {
 #if defined (__ARM_NEON__) || defined (__ARM_NEON)
 	unsigned long unrolled = nsamples / 4;
@@ -264,7 +262,7 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign
 
 	while (unrolled--) {
 		float32x4_t samples = vld1q_f32(src);
-		int32x4_t converted = float_24_neon(samples);
+		int32x4_t converted = float_32_neon(samples, scaling);
 		converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted)));
 
 		switch(dst_skip) {
@@ -287,7 +285,7 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign
 
 	while (nsamples--) {
 
-		float_24u32 (*src, z);
+		float_32 (*src, z, scaling);
 
 #if __BYTE_ORDER == __LITTLE_ENDIAN
 		dst[0]=(char)(z>>24);
@@ -303,12 +301,18 @@ void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsign
 		dst += dst_skip;
 		src++;
 	}
-}	
+}
 
-void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
+void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
+{
+	sample_move_d32scal_sSs (dst, src, nsamples, dst_skip, state, SAMPLE_24BIT_SCALING);
+}
+
+
+static inline void sample_move_d32scal_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state, const int32_t scaling)
 {
 #if defined (__SSE2__) && !defined (__sun__)
-	__m128 int_max = _mm_set1_ps(SAMPLE_24BIT_MAX_F);
+	__m128 int_max = _mm_set1_ps(scaling);
 	__m128 int_min = _mm_sub_ps(_mm_setzero_ps(), int_max);
 	__m128 factor = int_max;
 
@@ -361,7 +365,7 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne
 
 	while (unrolled--) {
 		float32x4_t samples = vld1q_f32(src);
-		int32x4_t converted = float_24_neon(samples);
+		int32x4_t converted = float_32_neon(samples, scaling);
 
 		switch(dst_skip) {
 			case 4:
@@ -382,12 +386,18 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne
 
 #if !defined (__SSE2__)
 	while (nsamples--) {
-		float_24u32 (*src, *((int32_t*) dst));
+		float_32 (*src, *((int32_t*) dst), scaling);
 		dst += dst_skip;
 		src++;
 	}
 #endif
-}	
+}
+
+void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
+{
+	sample_move_d32scal_sS (dst, src, nsamples, dst_skip, state, SAMPLE_24BIT_SCALING);
+}
+
 
 void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
 {
@@ -533,7 +543,7 @@ void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned
 		int i;
 		int32_t z[4];
 		float32x4_t samples = vld1q_f32(src);
-		int32x4_t converted = float_24_neon(samples);
+		int32x4_t converted = float_32_neon(samples, SAMPLE_24BIT_SCALING);
 		converted = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(converted)));
 		vst1q_s32(z, converted);
 
@@ -604,7 +614,7 @@ void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned l
 		int i;
 		int32_t z[4];
 		float32x4_t samples = vld1q_f32(src);
-		int32x4_t converted = float_24_neon(samples);
+		int32x4_t converted = float_32_neon(samples, SAMPLE_24BIT_SCALING);
 		vst1q_s32(z, converted);
 
 		for (i = 0; i != 4; ++i) {
author	Timo Wischer <twischer@de.adit-jv.com>	2018-11-16 17:08:20 +0100
committer	Filipe Coelho <falktx@falktx.com>	2019-01-17 00:56:39 +0100
commit	4455fe020ca890d8c6b24f89a7bf91f9044b9d33 (patch)
tree	553a4081b6f5aa5dc0db015086ccb1733f5805bd
parent	a82f3f2fb44e236e5bf3b207349ca000cd01185c (diff)
download	jack2-4455fe020ca890d8c6b24f89a7bf91f9044b9d33.tar.gz