diff options
author | Andreas Müller <schnitzeltony@googlemail.com> | 2017-01-15 20:52:20 +0100 |
---|---|---|
committer | Andreas Müller <schnitzeltony@googlemail.com> | 2017-02-23 16:09:24 +0100 |
commit | c32b823860bc8e887774ab10bc3e9bd76e85e3f3 (patch) | |
tree | 69c3e6df86b41cca9fd9fc1a7c3fee4ebd28d1bc /example-clients | |
parent | 77bb8be12e0d856fbc004cf57185ab36a2df04c2 (diff) | |
download | jack2-c32b823860bc8e887774ab10bc3e9bd76e85e3f3.tar.gz |
jack_simdtests: add application checking accurracy and performance of SIMD optimizations
Signed-off-by: Andreas Müller <schnitzeltony@googlemail.com>
Diffstat (limited to 'example-clients')
-rw-r--r-- | example-clients/simdtests.cpp | 390 | ||||
-rw-r--r-- | example-clients/wscript | 3 |
2 files changed, 392 insertions, 1 deletions
diff --git a/example-clients/simdtests.cpp b/example-clients/simdtests.cpp new file mode 100644 index 00000000..b74d50aa --- /dev/null +++ b/example-clients/simdtests.cpp @@ -0,0 +1,390 @@ +/* + * simdtests.c -- test accuraccy and performance of simd optimizations + * + * Copyright (C) 2017 Andreas Mueller. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + */ + +/* We must include all headers memops.c includes to avoid trouble with + * out namespace game below. + */ +#include <stdio.h> +#include <string.h> +#include <math.h> +#include <memory.h> +#include <stdlib.h> +#include <stdint.h> +#include <limits.h> +#ifdef __linux__ +#include <endian.h> +#endif +#include "memops.h" + +#if defined (__SSE2__) && !defined (__sun__) +#include <emmintrin.h> +#ifdef __SSE4_1__ +#include <smmintrin.h> +#endif +#endif + +#ifdef __ARM_NEON__ +#include <arm_neon.h> +#endif + +// our additional headers +#include <time.h> + +/* Dirty: include mempos.c twice the second time with SIMD disabled + * so we can compare aceelerated non accelerated + */ +namespace accelerated { +#include "../common/memops.c" +} + +namespace origerated { +#ifdef __SSE2__ +#undef __SSE2__ +#endif + +#ifdef __ARM_NEON__ +#undef __ARM_NEON__ +#endif + +#include "../common/memops.c" +} + +// define conversion function types +typedef void (*t_jack_to_integer)( + char *dst, + jack_default_audio_sample_t *src, + unsigned long nsamples, + unsigned long dst_skip, + dither_state_t *state); + +typedef void (*t_integer_to_jack)( + jack_default_audio_sample_t *dst, + char *src, + unsigned long nsamples, + unsigned long src_skip); + +// define/setup test case data +typedef struct test_case_data { + uint32_t frame_size; + uint32_t sample_size; + bool reverse; + t_jack_to_integer jack_to_integer_accel; + t_jack_to_integer jack_to_integer_orig; + t_integer_to_jack integer_to_jack_accel; + t_integer_to_jack integer_to_jack_orig; + dither_state_t *ditherstate; + const char *name; +} test_case_data_t; + +test_case_data_t test_cases[] = { + { + 4, + 3, + true, + accelerated::sample_move_d32u24_sSs, + origerated::sample_move_d32u24_sSs, + accelerated::sample_move_dS_s32u24s, + origerated::sample_move_dS_s32u24s, + NULL, + "32u24s" }, + { + 4, + 3, + false, + accelerated::sample_move_d32u24_sS, + origerated::sample_move_d32u24_sS, + accelerated::sample_move_dS_s32u24, + origerated::sample_move_dS_s32u24, + NULL, + "32u24" }, + { + 3, + 3, + true, + accelerated::sample_move_d24_sSs, + origerated::sample_move_d24_sSs, + accelerated::sample_move_dS_s24s, + origerated::sample_move_dS_s24s, + NULL, + "24s" }, + { + 3, + 3, + false, + accelerated::sample_move_d24_sS, + origerated::sample_move_d24_sS, + accelerated::sample_move_dS_s24, + origerated::sample_move_dS_s24, + NULL, + "24" }, + { + 2, + 2, + true, + accelerated::sample_move_d16_sSs, + origerated::sample_move_d16_sSs, + accelerated::sample_move_dS_s16s, + origerated::sample_move_dS_s16s, + NULL, + "16s" }, + { + 2, + 2, + false, + accelerated::sample_move_d16_sS, + origerated::sample_move_d16_sS, + accelerated::sample_move_dS_s16, + origerated::sample_move_dS_s16, + NULL, + "16" }, +}; + +// we need to repeat for better accuracy at time measurement +const uint32_t retry_per_case = 1000; + +// setup test buffers +#define TESTBUFF_SIZE 1024 +jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE]; +// integer buffers: max 4 bytes per value / * 2 for stereo +char integerbuffer_accel[TESTBUFF_SIZE*4*2]; +char integerbuffer_orig[TESTBUFF_SIZE*4*2]; +// float buffers +jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE]; +jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE]; + +// comparing unsigned makes life easier +uint32_t extract_integer( + char* buff, + uint32_t offset, + uint32_t frame_size, + uint32_t sample_size, + bool big_endian) +{ + uint32_t retval = 0; + unsigned char* curr; + uint32_t mult = 1; + if(big_endian) { + curr = (unsigned char*)buff + offset + sample_size-1; + for(uint32_t i=0; i<sample_size; i++) { + retval += *(curr--) * mult; + mult*=256; + } + } + else { + curr = (unsigned char*)buff + offset + frame_size-sample_size; + for(uint32_t i=0; i<sample_size; i++) { + retval += *(curr++) * mult; + mult*=256; + } + } + return retval; +} + +int main(int argc, char *argv[]) +{ +// parse_arguments(argc, argv); + uint32_t maxerr_displayed = 10; + + // fill jackbuffer + for(int i=0; i<TESTBUFF_SIZE; i++) { + // ramp + jack_default_audio_sample_t value = + ((jack_default_audio_sample_t)((i % TESTBUFF_SIZE) - TESTBUFF_SIZE/2)) / (TESTBUFF_SIZE/2); + // force clipping + value *= 1.02; + jackbuffer_source[i] = value; + } + + for(uint32_t testcase=0; testcase<sizeof(test_cases)/sizeof(test_case_data_t); testcase++) { + // test mono/stereo + for(uint32_t channels=1; channels<=2; channels++) { + ////////////////////////////////////////////////////////////////////////////// + // jackfloat -> integer + + // clean target buffers + memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel)); + memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig)); + // accel + clock_t time_to_integer_accel = clock(); + for(uint32_t repetition=0; repetition<retry_per_case; repetition++) + { + test_cases[testcase].jack_to_integer_accel( + integerbuffer_accel, + jackbuffer_source, + TESTBUFF_SIZE, + test_cases[testcase].frame_size*channels, + test_cases[testcase].ditherstate); + } + float timediff_to_integer_accel = ((float)(clock() - time_to_integer_accel)) / CLOCKS_PER_SEC; + // orig + clock_t time_to_integer_orig = clock(); + for(uint32_t repetition=0; repetition<retry_per_case; repetition++) + { + test_cases[testcase].jack_to_integer_orig( + integerbuffer_orig, + jackbuffer_source, + TESTBUFF_SIZE, + test_cases[testcase].frame_size*channels, + test_cases[testcase].ditherstate); + } + float timediff_to_integer_orig = ((float)(clock() - time_to_integer_orig)) / CLOCKS_PER_SEC; + // output performance results + printf( + "JackFloat->Integer @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n", + test_cases[testcase].name, + channels, + timediff_to_integer_orig, + timediff_to_integer_accel, + (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0); + uint32_t int_deviation_max = 0; + uint32_t int_error_count = 0; + // output error (avoid spam -> limit error lines per test case) + for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) { + uint32_t sample_offset = sample*test_cases[testcase].frame_size*channels; + // compare both results + uint32_t intval_accel=extract_integer( + integerbuffer_accel, + sample_offset, + test_cases[testcase].frame_size, + test_cases[testcase].sample_size, +#if __BYTE_ORDER == __BIG_ENDIAN + !test_cases[testcase].reverse); +#else + test_cases[testcase].reverse); +#endif + uint32_t intval_orig=extract_integer( + integerbuffer_orig, + sample_offset, + test_cases[testcase].frame_size, + test_cases[testcase].sample_size, +#if __BYTE_ORDER == __BIG_ENDIAN + !test_cases[testcase].reverse); +#else + test_cases[testcase].reverse); +#endif + if(intval_accel != intval_orig) { + if(int_error_count<maxerr_displayed) { + printf("Value error sample %u:", sample); + printf(" Orig 0x"); + char formatstr[10]; + sprintf(formatstr, "%%0%uX", test_cases[testcase].sample_size*2); + printf(formatstr, intval_orig); + printf(" Accel 0x"); + printf(formatstr, intval_accel); + printf("\n"); + } + int_error_count++; + uint32_t int_deviation; + if(intval_accel > intval_orig) + int_deviation = intval_accel-intval_orig; + else + int_deviation = intval_orig-intval_accel; + if(int_deviation > int_deviation_max) + int_deviation_max = int_deviation; + } + } + printf( + "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n", + test_cases[testcase].name, + channels, + int_error_count, + int_deviation_max); + + ////////////////////////////////////////////////////////////////////////////// + // integer -> jackfloat + + // clean target buffers + memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel)); + memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig)); + // accel + clock_t time_to_float_accel = clock(); + for(uint32_t repetition=0; repetition<retry_per_case; repetition++) + { + test_cases[testcase].integer_to_jack_accel( + jackfloatbuffer_accel, + integerbuffer_orig, + TESTBUFF_SIZE, + test_cases[testcase].frame_size*channels); + } + float timediff_to_float_accel = ((float)(clock() - time_to_float_accel)) / CLOCKS_PER_SEC; + // orig + clock_t time_to_float_orig = clock(); + for(uint32_t repetition=0; repetition<retry_per_case; repetition++) + { + test_cases[testcase].integer_to_jack_orig( + jackfloatbuffer_orig, + integerbuffer_orig, + TESTBUFF_SIZE, + test_cases[testcase].frame_size*channels); + } + float timediff_to_float_orig = ((float)(clock() - time_to_float_orig)) / CLOCKS_PER_SEC; + // output performance results + printf( + "Integer->JackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n", + test_cases[testcase].name, + channels, + timediff_to_float_orig, + timediff_to_float_accel, + (timediff_to_float_orig/timediff_to_float_accel-1)*100.0); + jack_default_audio_sample_t float_deviation_max = 0.0; + uint32_t float_error_count = 0; + // output error (avoid spam -> limit error lines per test case) + for(uint32_t sample=0; sample<TESTBUFF_SIZE; sample++) { + // For easier estimation/readabilty we scale floats back to integer + jack_default_audio_sample_t sample_scaling; + switch(test_cases[testcase].sample_size) { + case 2: + sample_scaling = SAMPLE_16BIT_SCALING; + break; + default: + sample_scaling = SAMPLE_24BIT_SCALING; + break; + } + jack_default_audio_sample_t floatval_accel = jackfloatbuffer_accel[sample] * sample_scaling; + jack_default_audio_sample_t floatval_orig = jackfloatbuffer_orig[sample] * sample_scaling; + // compare both results + jack_default_audio_sample_t float_deviation; + if(floatval_accel > floatval_orig) + float_deviation = floatval_accel-floatval_orig; + else + float_deviation = floatval_orig-floatval_accel; + if(float_deviation > float_deviation_max) + float_deviation_max = float_deviation; + // deviation > half bit => error + if(float_deviation > 0.5) { + if(float_error_count<maxerr_displayed) { + printf("Value error sample %u:", sample); + printf(" Orig %8.1f Accel %8.1f\n", floatval_orig, floatval_accel); + } + float_error_count++; + } + } + printf( + "Integer->JackFloat @%7.7s/%u: Errors: %u Max deviation %f\n", + test_cases[testcase].name, + channels, + float_error_count, + float_deviation_max); + + printf("\n"); + } + } + return 0; +} diff --git a/example-clients/wscript b/example-clients/wscript index ba67614e..1b2f6748 100644 --- a/example-clients/wscript +++ b/example-clients/wscript @@ -28,7 +28,8 @@ example_programs = { 'jack_net_master' : 'netmaster.c', 'jack_latent_client' : 'latent_client.c', 'jack_midi_dump' : 'midi_dump.c', - 'jack_midi_latency_test' : 'midi_latency_test.c' + 'jack_midi_latency_test' : 'midi_latency_test.c', + 'jack_simdtests' : 'simdtests.cpp' } example_libs = { |