summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTimo Wischer <twischer@de.adit-jv.com>2018-11-20 16:47:37 +0100
committerFilipe Coelho <falktx@falktx.com>2019-01-17 00:56:39 +0100
commitb4ea23df6a16180a3d686cefffe7dd91fcbc9a27 (patch)
tree4a3be3308c19805696ae42565f57cd337d69a65e
parent244fc27e29d8523acbd6a166a8a11bab969a84b7 (diff)
downloadjack2-b4ea23df6a16180a3d686cefffe7dd91fcbc9a27.tar.gz
memops: Align S24LE and S32LE to float conversion
Due to the optimization of the compiler this commit does not introduce any differences in the machine instructions. Tested with GCC version 6.2.0 on ARM64 and x86_64. Signed-off-by: Timo Wischer <twischer@de.adit-jv.com>
-rw-r--r--common/memops.c65
1 files changed, 46 insertions, 19 deletions
diff --git a/common/memops.c b/common/memops.c
index 290449b2..266b7b8f 100644
--- a/common/memops.c
+++ b/common/memops.c
@@ -29,6 +29,7 @@
#include <memory.h>
#include <stdlib.h>
#include <stdint.h>
+#include <stdbool.h>
#include <limits.h>
#ifdef __linux__
#include <endian.h>
@@ -412,9 +413,10 @@ void sample_move_d32_sS (char *dst, jack_default_audio_sample_t *src, unsigned l
}
-void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
+static inline void sample_move_dS_s32s_signext (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip, const bool do_signext)
{
- const jack_default_audio_sample_t scaling = 1.0 / (SAMPLE_24BIT_SCALING << 8);
+ const jack_default_audio_sample_t scaling_divisor = do_signext ? (SAMPLE_24BIT_SCALING << 8) : SAMPLE_32BIT_SCALING;
+ const jack_default_audio_sample_t scaling = 1.0 / scaling_divisor;
#if defined (__ARM_NEON__) || defined (__ARM_NEON)
float32x4_t factor = vdupq_n_f32(scaling);
@@ -437,9 +439,11 @@ void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsign
break;
}
src128 = vreinterpretq_s32_u8(vrev32q_u8(vreinterpretq_u8_s32(src128)));
- /* sign extension - left shift will be reverted by scaling */
- int32x4_t shifted = vshlq_n_s32(src128, 8);
- float32x4_t as_float = vcvtq_f32_s32(shifted);
+ if (do_signext) {
+ /* sign extension - left shift will be reverted by scaling */
+ src128 = vshlq_n_s32(src128, 8);
+ }
+ float32x4_t as_float = vcvtq_f32_s32(src128);
float32x4_t divided = vmulq_f32(as_float, factor);
vst1q_f32(dst, divided);
@@ -470,16 +474,26 @@ void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsign
x <<= 8;
x |= (unsigned char)(src[0]);
#endif
- /* sign extension - left shift will be reverted by scaling */
- *dst = (x << 8) * scaling;
+ if (do_signext) {
+ /* sign extension - left shift will be reverted by scaling */
+ x <<= 8;
+ }
+ *dst = x * scaling;
dst++;
src += src_skip;
}
}
-void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
+void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
+{
+ sample_move_dS_s32s_signext (dst, src, nsamples, src_skip, true);
+}
+
+
+static inline void sample_move_dS_s32_signext (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip, const bool do_signext)
{
- const jack_default_audio_sample_t scaling = 1.0 / (SAMPLE_24BIT_SCALING << 8);
+ const jack_default_audio_sample_t scaling_divisor = do_signext ? (SAMPLE_24BIT_SCALING << 8) : SAMPLE_32BIT_SCALING;
+ const jack_default_audio_sample_t scaling = 1.0 / scaling_divisor;
#if defined (__SSE2__) && !defined (__sun__)
unsigned long unrolled = nsamples / 4;
@@ -495,11 +509,12 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne
int i4 = *((int *) src);
src+= src_skip;
- __m128i src = _mm_set_epi32(i4, i3, i2, i1);
- /* sign extension - left shift will be reverted by scaling */
- __m128i shifted = _mm_slli_epi32(src, 8);
-
- __m128 as_float = _mm_cvtepi32_ps(shifted);
+ __m128i src128 = _mm_set_epi32(i4, i3, i2, i1);
+ if (do_signext) {
+ /* sign extension - left shift will be reverted by scaling */
+ src128 = _mm_slli_epi32(src128, 8);
+ }
+ __m128 as_float = _mm_cvtepi32_ps(src128);
__m128 divided = _mm_mul_ps(as_float, factor);
_mm_storeu_ps(dst, divided);
@@ -526,9 +541,11 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne
src128 = vld1q_lane_s32((int32_t*)(src+3*src_skip), src128, 3);
break;
}
- /* sign extension - left shift will be reverted by scaling */
- int32x4_t shifted = vshlq_n_s32(src128, 8);
- float32x4_t as_float = vcvtq_f32_s32(shifted);
+ if (do_signext) {
+ /* sign extension - left shift will be reverted by scaling */
+ src128 = vshlq_n_s32(src128, 8);
+ }
+ float32x4_t as_float = vcvtq_f32_s32(src128);
float32x4_t divided = vmulq_f32(as_float, factor);
vst1q_f32(dst, divided);
@@ -541,13 +558,23 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne
/* ALERT: signed sign-extension portability !!! */
while (nsamples--) {
- /* sign extension - left shift will be reverted by scaling */
- *dst = (*((int *) src) << 8) * scaling;
+ int src32 = *((int *) src);
+ if (do_signext) {
+ /* sign extension - left shift will be reverted by scaling */
+ src32 <<= 8;
+ }
+ *dst = src32 * scaling;
dst++;
src += src_skip;
}
}
+void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip)
+{
+ sample_move_dS_s32_signext (dst, src, nsamples, src_skip, true);
+}
+
+
void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state)
{
#if defined (__ARM_NEON__) || defined (__ARM_NEON)