diff options
author | twischer-adit <twischer@de.adit-jv.com> | 2019-01-16 10:41:53 +0100 |
---|---|---|
committer | Filipe Coelho <falktx@falktx.com> | 2019-01-16 10:41:53 +0100 |
commit | 04bdd8a6358ead60b4da88d1689e926434fc160a (patch) | |
tree | 0cdeeaca043b51f436feedd8896eef9db8eb9957 | |
parent | 9fe56af980e4a7279cfe826a2b1f8bf1f799763a (diff) | |
download | jack2-04bdd8a6358ead60b4da88d1689e926434fc160a.tar.gz |
arm64: Enable simd implementations (#398)
GCC defines __ARM_NEON__ for asimd on ARMv7 and __ARM_NEON for simd on
ARMv8. Therefore also check for __ARM_NEON.
Signed-off-by: Timo Wischer <twischer@de.adit-jv.com>
-rw-r--r-- | common/JackAudioPort.cpp | 6 | ||||
-rw-r--r-- | common/memops.c | 28 | ||||
-rw-r--r-- | example-clients/simdtests.cpp | 6 |
3 files changed, 22 insertions, 18 deletions
diff --git a/common/JackAudioPort.cpp b/common/JackAudioPort.cpp index 43f5c5fe..ac8943ec 100644 --- a/common/JackAudioPort.cpp +++ b/common/JackAudioPort.cpp @@ -28,7 +28,7 @@ Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. #include <Accelerate/Accelerate.h> #elif defined (__SSE__) && !defined (__sun__) #include <xmmintrin.h> -#elif defined (__ARM_NEON__) +#elif defined (__ARM_NEON__) || defined (__ARM_NEON) #include <arm_neon.h> #endif @@ -56,7 +56,7 @@ static inline void MixAudioBuffer(jack_default_audio_sample_t* mixbuffer, jack_d mixbuffer += 4; buffer += 4; frames_group--; - #elif defined (__ARM_NEON__) + #elif defined (__ARM_NEON__) || defined (__ARM_NEON) float32x4_t vec = vaddq_f32(vld1q_f32(mixbuffer), vld1q_f32(buffer)); vst1q_f32(mixbuffer, vec); @@ -125,7 +125,7 @@ static void AudioBufferMixdown(void* mixbuffer, void** src_buffers, int src_coun for (jack_nframes_t i = 0; i != remaining_frames; ++i) { target[i] = source[i]; } -#elif defined (__ARM_NEON__) +#elif defined (__ARM_NEON__) || defined (__ARM_NEON) jack_nframes_t frames_group = nframes / 4; jack_nframes_t remaining_frames = nframes % 4; diff --git a/common/memops.c b/common/memops.c index fa4193eb..6e8d273a 100644 --- a/common/memops.c +++ b/common/memops.c @@ -42,7 +42,7 @@ #endif #endif -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) #include <arm_neon.h> #endif @@ -198,7 +198,7 @@ static inline __m128i float_24_sse(__m128 s) #endif -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) static inline float32x4_t clip(float32x4_t s, float32x4_t min, float32x4_t max) { @@ -281,7 +281,7 @@ void sample_move_dS_floatLE (char *dst, jack_default_audio_sample_t *src, unsign void sample_move_d32u24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) { -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) unsigned long unrolled = nsamples / 4; nsamples = nsamples & 3; @@ -380,7 +380,7 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne src++; } -#elif defined(__ARM_NEON__) +#elif defined (__ARM_NEON__) || defined (__ARM_NEON) unsigned long unrolled = nsamples / 4; nsamples = nsamples & 3; @@ -417,7 +417,7 @@ void sample_move_d32u24_sS (char *dst, jack_default_audio_sample_t *src, unsigne void sample_move_dS_s32u24s (jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip) { -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) float32x4_t factor = vdupq_n_f32(1.0 / SAMPLE_24BIT_SCALING); unsigned long unrolled = nsamples / 4; while (unrolled--) { @@ -506,7 +506,7 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne dst += 4; } nsamples = nsamples & 3; -#elif defined(__ARM_NEON__) +#elif defined (__ARM_NEON__) || defined (__ARM_NEON) unsigned long unrolled = nsamples / 4; float32x4_t factor = vdupq_n_f32(1.0 / SAMPLE_24BIT_SCALING); while (unrolled--) { @@ -548,7 +548,7 @@ void sample_move_dS_s32u24 (jack_default_audio_sample_t *dst, char *src, unsigne void sample_move_d24_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) { -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) unsigned long unrolled = nsamples / 4; while (unrolled--) { int i; @@ -619,7 +619,7 @@ void sample_move_d24_sS (char *dst, jack_default_audio_sample_t *src, unsigned l nsamples -= 4; src += 4; } -#elif defined(__ARM_NEON__) +#elif defined (__ARM_NEON__) || defined (__ARM_NEON) unsigned long unrolled = nsamples / 4; while (unrolled--) { int i; @@ -655,7 +655,7 @@ void sample_move_dS_s24s (jack_default_audio_sample_t *dst, char *src, unsigned { const jack_default_audio_sample_t scaling = 1.0/SAMPLE_24BIT_SCALING; -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) // we shift 8 to the right by dividing by 256.0 -> no sign extra handling const float32x4_t vscaling = vdupq_n_f32(scaling/256.0); int32_t x[4]; @@ -740,7 +740,7 @@ void sample_move_dS_s24 (jack_default_audio_sample_t *dst, char *src, unsigned l dst += 4; nsamples -= 4; } -#elif defined(__ARM_NEON__) +#elif defined (__ARM_NEON__) || defined (__ARM_NEON) // we shift 8 to the right by dividing by 256.0 -> no sign extra handling const float32x4_t vscaling = vdupq_n_f32(scaling/256.0); int32_t x[4]; @@ -787,7 +787,7 @@ void sample_move_dS_s24 (jack_default_audio_sample_t *dst, char *src, unsigned l void sample_move_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) { -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) unsigned long unrolled = nsamples / 4; nsamples = nsamples & 3; @@ -838,7 +838,7 @@ void sample_move_d16_sSs (char *dst, jack_default_audio_sample_t *src, unsigned void sample_move_d16_sS (char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state) { -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) unsigned long unrolled = nsamples / 4; nsamples = nsamples & 3; @@ -1017,7 +1017,7 @@ void sample_move_dS_s16s (jack_default_audio_sample_t *dst, char *src, unsigned { short z; const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING; -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) const float32x4_t vscaling = vdupq_n_f32(scaling); unsigned long unrolled = nsamples / 4; while (unrolled--) { @@ -1069,7 +1069,7 @@ void sample_move_dS_s16 (jack_default_audio_sample_t *dst, char *src, unsigned l { /* ALERT: signed sign-extension portability !!! */ const jack_default_audio_sample_t scaling = 1.0/SAMPLE_16BIT_SCALING; -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) const float32x4_t vscaling = vdupq_n_f32(scaling); unsigned long unrolled = nsamples / 4; while (unrolled--) { diff --git a/example-clients/simdtests.cpp b/example-clients/simdtests.cpp index b74d50aa..ad7db69e 100644 --- a/example-clients/simdtests.cpp +++ b/example-clients/simdtests.cpp @@ -40,7 +40,7 @@ #endif #endif -#ifdef __ARM_NEON__ +#if defined (__ARM_NEON__) || defined (__ARM_NEON) #include <arm_neon.h> #endif @@ -63,6 +63,10 @@ namespace origerated { #undef __ARM_NEON__ #endif +#ifdef __ARM_NEON +#undef __ARM_NEON +#endif + #include "../common/memops.c" } |