summaryrefslogtreecommitdiff
path: root/src/neon/gf_w4_neon.c
diff options
context:
space:
mode:
authorbassamtabbara <bassam.tabbara@quantum.com>2016-09-14 20:22:27 +0000
committerbassamtabbara <bassam.tabbara@quantum.com>2016-09-14 20:22:27 +0000
commita6847973cba329ae079d3bd26341a4ec2906f012 (patch)
treecbdb3947d9d86f2fa7d9cee84d3b773e1bb8f2b2 /src/neon/gf_w4_neon.c
parent185295f247698f727fd3bb11c4795e1741bb359e (diff)
parent0690ba86a81faff99a3383b5907ddc02a317eea0 (diff)
downloadgf-complete-a6847973cba329ae079d3bd26341a4ec2906f012.tar.gz
Merge branch 'simd-runtime-detection' into 'master'
Support for runtime detection of SIMD This merge request adds support for runtime SIMD detection. The idea is that you would build gf-complete with full SIMD support, and gf_init will select the appropriate function at runtime based on the capabilities of the target machine. This would eliminate the need to build different versions of the code for different processors (you still need to build for different archs). Ceph for example has 3-4 flavors of jerasure on Intel (and does not support PCLMUL optimizations as a result of using to many binaries). Numerous libraries have followed as similar approach include zlib. When reviewing this merge request I recommend that you look at each of the 5 commits independently. The first 3 commits don't change the existing logic. Instead they add debugging functions and test scripts that facilitate testing of the 4th and commit. The 4th commit is where all the new logic goes along with tests. The 5th commit fixes build scripts. I've tested this on x86_64, arm, and aarch64 using QEMU. Numerous tests have been added that help this code and could help with future testing of gf-complete. Also I've compared the functions selected with the old code (prior to runtime SIMD support) with the new code and all functions are identical. Here's a gist with the test results prior to SIMD extensions: https://gist.github.com/bassamtabbara/d9a6dcf0a749b7ab01bc2953a359edec. See merge request !18
Diffstat (limited to 'src/neon/gf_w4_neon.c')
-rw-r--r--src/neon/gf_w4_neon.c6
1 files changed, 3 insertions, 3 deletions
diff --git a/src/neon/gf_w4_neon.c b/src/neon/gf_w4_neon.c
index 3a21432..5f35c86 100644
--- a/src/neon/gf_w4_neon.c
+++ b/src/neon/gf_w4_neon.c
@@ -235,13 +235,13 @@ gf_w4_single_table_multiply_region_neon(gf_t *gf, void *src, void *dest,
int gf_w4_neon_cfm_init(gf_t *gf)
{
// single clm multiplication probably pointless
- gf->multiply.w32 = gf_w4_neon_clm_multiply;
- gf->multiply_region.w32 = gf_w4_neon_clm_multiply_region_from_single;
+ SET_FUNCTION(gf,multiply,w32,gf_w4_neon_clm_multiply)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w4_neon_clm_multiply_region_from_single)
return 1;
}
void gf_w4_neon_single_table_init(gf_t *gf)
{
- gf->multiply_region.w32 = gf_w4_single_table_multiply_region_neon;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w4_single_table_multiply_region_neon)
}