/* * simdtests.c -- test accuracy and performance of simd optimizations * * Copyright (C) 2017 Andreas Mueller. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ /* We must include all headers memops.c includes to avoid trouble with * out namespace game below. */ #include #include #include #include #include #include #include #ifdef __linux__ #include #endif #include "memops.h" #if defined (__SSE2__) && !defined (__sun__) #include #ifdef __SSE4_1__ #include #endif #endif #if defined (__ARM_NEON__) || defined (__ARM_NEON) #include #endif // our additional headers #include /* Dirty: include mempos.c twice the second time with SIMD disabled * so we can compare aceelerated non accelerated */ namespace accelerated { #include "../common/memops.c" } namespace origerated { #ifdef __SSE2__ #undef __SSE2__ #endif #ifdef __ARM_NEON__ #undef __ARM_NEON__ #endif #ifdef __ARM_NEON #undef __ARM_NEON #endif #include "../common/memops.c" } // define conversion function types typedef void (*t_jack_to_integer)( char *dst, jack_default_audio_sample_t *src, unsigned long nsamples, unsigned long dst_skip, dither_state_t *state); typedef void (*t_integer_to_jack)( jack_default_audio_sample_t *dst, char *src, unsigned long nsamples, unsigned long src_skip); // define/setup test case data typedef struct test_case_data { uint32_t frame_size; uint32_t sample_size; bool reverse; t_jack_to_integer jack_to_integer_accel; t_jack_to_integer jack_to_integer_orig; t_integer_to_jack integer_to_jack_accel; t_integer_to_jack integer_to_jack_orig; dither_state_t *ditherstate; const char *name; } test_case_data_t; test_case_data_t test_cases[] = { { 4, 3, true, accelerated::sample_move_d32u24_sSs, origerated::sample_move_d32u24_sSs, accelerated::sample_move_dS_s32u24s, origerated::sample_move_dS_s32u24s, NULL, "32u24s" }, { 4, 3, false, accelerated::sample_move_d32u24_sS, origerated::sample_move_d32u24_sS, accelerated::sample_move_dS_s32u24, origerated::sample_move_dS_s32u24, NULL, "32u24" }, { 4, 3, true, accelerated::sample_move_d32l24_sSs, origerated::sample_move_d32l24_sSs, accelerated::sample_move_dS_s32l24s, origerated::sample_move_dS_s32l24s, NULL, "32l24s" }, { 4, 3, false, accelerated::sample_move_d32l24_sS, origerated::sample_move_d32l24_sS, accelerated::sample_move_dS_s32l24, origerated::sample_move_dS_s32l24, NULL, "32l24" }, { 3, 3, true, accelerated::sample_move_d24_sSs, origerated::sample_move_d24_sSs, accelerated::sample_move_dS_s24s, origerated::sample_move_dS_s24s, NULL, "24s" }, { 3, 3, false, accelerated::sample_move_d24_sS, origerated::sample_move_d24_sS, accelerated::sample_move_dS_s24, origerated::sample_move_dS_s24, NULL, "24" }, { 2, 2, true, accelerated::sample_move_d16_sSs, origerated::sample_move_d16_sSs, accelerated::sample_move_dS_s16s, origerated::sample_move_dS_s16s, NULL, "16s" }, { 2, 2, false, accelerated::sample_move_d16_sS, origerated::sample_move_d16_sS, accelerated::sample_move_dS_s16, origerated::sample_move_dS_s16, NULL, "16" }, }; // we need to repeat for better accuracy at time measurement const uint32_t retry_per_case = 1000; // setup test buffers #define TESTBUFF_SIZE 1024 jack_default_audio_sample_t jackbuffer_source[TESTBUFF_SIZE]; // integer buffers: max 4 bytes per value / * 2 for stereo char integerbuffer_accel[TESTBUFF_SIZE*4*2]; char integerbuffer_orig[TESTBUFF_SIZE*4*2]; // float buffers jack_default_audio_sample_t jackfloatbuffer_accel[TESTBUFF_SIZE]; jack_default_audio_sample_t jackfloatbuffer_orig[TESTBUFF_SIZE]; // comparing unsigned makes life easier uint32_t extract_integer( char* buff, uint32_t offset, uint32_t frame_size, uint32_t sample_size, bool big_endian) { uint32_t retval = 0; unsigned char* curr; uint32_t mult = 1; if(big_endian) { curr = (unsigned char*)buff + offset + sample_size-1; for(uint32_t i=0; i integer // clean target buffers memset(integerbuffer_accel, 0, sizeof(integerbuffer_accel)); memset(integerbuffer_orig, 0, sizeof(integerbuffer_orig)); // accel clock_t time_to_integer_accel = clock(); for(uint32_t repetition=0; repetitionInteger @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n", test_cases[testcase].name, channels, timediff_to_integer_orig, timediff_to_integer_accel, (timediff_to_integer_orig/timediff_to_integer_accel-1)*100.0); uint32_t int_deviation_max = 0; uint32_t int_error_count = 0; // output error (avoid spam -> limit error lines per test case) for(uint32_t sample=0; sampleintval_orig+1 || intval_orig>intval_accel+1) { if(int_error_count intval_orig) int_deviation = intval_accel-intval_orig; else int_deviation = intval_orig-intval_accel; if(int_deviation > int_deviation_max) int_deviation_max = int_deviation; } } printf( "JackFloat->Integer @%7.7s/%u: Errors: %u Max deviation %u\n", test_cases[testcase].name, channels, int_error_count, int_deviation_max); ////////////////////////////////////////////////////////////////////////////// // integer -> jackfloat // clean target buffers memset(jackfloatbuffer_accel, 0, sizeof(jackfloatbuffer_accel)); memset(jackfloatbuffer_orig, 0, sizeof(jackfloatbuffer_orig)); // accel clock_t time_to_float_accel = clock(); for(uint32_t repetition=0; repetitionJackFloat @%7.7s/%u: Orig %7.6f sec / Accel %7.6f sec -> Win: %5.2f %%\n", test_cases[testcase].name, channels, timediff_to_float_orig, timediff_to_float_accel, (timediff_to_float_orig/timediff_to_float_accel-1)*100.0); jack_default_audio_sample_t float_deviation_max = 0.0; uint32_t float_error_count = 0; // output error (avoid spam -> limit error lines per test case) for(uint32_t sample=0; sample floatval_orig) float_deviation = floatval_accel-floatval_orig; else float_deviation = floatval_orig-floatval_accel; if(float_deviation > float_deviation_max) float_deviation_max = float_deviation; // deviation > half bit => error if(float_deviation > 0.5) { if(float_error_countJackFloat @%7.7s/%u: Errors: %u Max deviation %f\n", test_cases[testcase].name, channels, float_error_count, float_deviation_max); printf("\n"); } } return 0; }