diff options
Diffstat (limited to 'src/gf_w16.c')
-rw-r--r-- | src/gf_w16.c | 231 |
1 files changed, 114 insertions, 117 deletions
diff --git a/src/gf_w16.c b/src/gf_w16.c index 4e026b2..8316892 100644 --- a/src/gf_w16.c +++ b/src/gf_w16.c @@ -12,6 +12,7 @@ #include <stdio.h> #include <stdlib.h> #include "gf_w16.h" +#include "gf_cpu.h" #define AB2(ip, am1 ,am2, b, t1, t2) {\ t1 = (b << 1) & am1;\ @@ -391,6 +392,7 @@ gf_val_32_t gf_w16_matrix (gf_t *gf, gf_val_32_t b) extra memory. */ +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -398,8 +400,6 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -433,11 +433,11 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); - -#endif return rv; } +#endif +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -445,8 +445,6 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -473,11 +471,11 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); - -#endif return rv; } +#endif +#if defined(INTEL_SSE4_PCLMUL) static inline gf_val_32_t @@ -485,8 +483,6 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) { gf_val_32_t rv = 0; -#if defined(INTEL_SSE4_PCLMUL) - __m128i a, b; __m128i result; __m128i prim_poly; @@ -515,10 +511,9 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) rv = ((gf_val_32_t)_mm_extract_epi32(result, 0)); - -#endif return rv; } +#endif static @@ -548,7 +543,7 @@ gf_w16_shift_multiply (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16) static int gf_w16_shift_init(gf_t *gf) { - gf->multiply.w32 = gf_w16_shift_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w16_shift_multiply) return 1; } @@ -556,25 +551,27 @@ static int gf_w16_cfm_init(gf_t *gf) { #if defined(INTEL_SSE4_PCLMUL) - gf_internal_t *h; + if (gf_cpu_supports_intel_pclmul) { + gf_internal_t *h; - h = (gf_internal_t *) gf->scratch; - - /*Ben: Determining how many reductions to do */ - - if ((0xfe00 & h->prim_poly) == 0) { - gf->multiply.w32 = gf_w16_clm_multiply_2; - gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2; - } else if((0xf000 & h->prim_poly) == 0) { - gf->multiply.w32 = gf_w16_clm_multiply_3; - gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_3; - } else if ((0xe000 & h->prim_poly) == 0) { - gf->multiply.w32 = gf_w16_clm_multiply_4; - gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_4; - } else { - return 0; - } - return 1; + h = (gf_internal_t *) gf->scratch; + + /*Ben: Determining how many reductions to do */ + + if ((0xfe00 & h->prim_poly) == 0) { + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_2) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_2) + } else if((0xf000 & h->prim_poly) == 0) { + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_3) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_3) + } else if ((0xe000 & h->prim_poly) == 0) { + SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_4) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_4) + } else { + return 0; + } + return 1; + } #endif return 0; @@ -688,10 +685,9 @@ int gf_w16_log_init(gf_t *gf) if (check) { if (h->mult_type != GF_MULT_LOG_TABLE) { - -#if defined(INTEL_SSE4_PCLMUL) - return gf_w16_cfm_init(gf); -#endif + if (gf_cpu_supports_intel_pclmul) { + return gf_w16_cfm_init(gf); + } return gf_w16_shift_init(gf); } else { _gf_errno = GF_E_LOGPOLY; @@ -705,10 +701,10 @@ int gf_w16_log_init(gf_t *gf) ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; } - gf->inverse.w32 = gf_w16_log_inverse; - gf->divide.w32 = gf_w16_log_divide; - gf->multiply.w32 = gf_w16_log_multiply; - gf->multiply_region.w32 = gf_w16_log_multiply_region; + SET_FUNCTION(gf,inverse,w32,gf_w16_log_inverse) + SET_FUNCTION(gf,divide,w32,gf_w16_log_divide) + SET_FUNCTION(gf,multiply,w32,gf_w16_log_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_log_multiply_region) return 1; } @@ -948,11 +944,11 @@ gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v gf_do_final_region_alignment(&rd); } +#ifdef INTEL_SSSE3 static void gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) { -#ifdef INTEL_SSSE3 uint64_t i, j, *s64, *d64, *top64;; uint64_t c, prod; uint8_t low[4][16]; @@ -1078,14 +1074,14 @@ gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_v } gf_do_final_region_alignment(&rd); -#endif } +#endif +#ifdef INTEL_SSSE3 static void gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor) { -#ifdef INTEL_SSSE3 uint64_t i, j, *s64, *d64, *top64;; uint64_t c, prod; uint8_t low[4][16]; @@ -1187,8 +1183,8 @@ gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des } gf_do_final_region_alignment(&rd); -#endif } +#endif uint32_t gf_w16_split_8_8_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b) @@ -1216,21 +1212,11 @@ int gf_w16_split_init(gf_t *gf) { gf_internal_t *h; struct gf_w16_split_8_8_data *d8; - int i, j, exp, issse3; - int isneon = 0; + int i, j, exp; uint32_t p, basep, tmp; h = (gf_internal_t *) gf->scratch; -#ifdef INTEL_SSSE3 - issse3 = 1; -#else - issse3 = 0; -#endif -#ifdef ARM_NEON - isneon = 1; -#endif - if (h->arg1 == 8 && h->arg2 == 8) { d8 = (struct gf_w16_split_8_8_data *) h->private; basep = 1; @@ -1260,8 +1246,8 @@ int gf_w16_split_init(gf_t *gf) } for (i = 0; i < 8; i++) basep = GF_MULTBY_TWO(basep); } - gf->multiply.w32 = gf_w16_split_8_8_multiply; - gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; + SET_FUNCTION(gf,multiply,w32,gf_w16_split_8_8_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region) return 1; } @@ -1273,36 +1259,45 @@ int gf_w16_split_init(gf_t *gf) /* Defaults */ - if (issse3) { - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_multiply_region; - } else if (isneon) { -#ifdef ARM_NEON +#ifdef INTEL_SSSE3 + if (gf_cpu_supports_intel_ssse3) { + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_sse_multiply_region) + } else { +#elif ARM_NEON + if (gf_cpu_supports_arm_neon) { gf_w16_neon_split_init(gf); -#endif } else { - gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; +#endif + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region) +#if defined(INTEL_SSSE3) || defined(ARM_NEON) } - +#endif if ((h->arg1 == 8 && h->arg2 == 16) || (h->arg2 == 8 && h->arg1 == 16)) { - gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region) } else if ((h->arg1 == 4 && h->arg2 == 16) || (h->arg2 == 4 && h->arg1 == 16)) { - if (issse3 || isneon) { +#if defined(INTEL_SSSE3) || defined(ARM_NEON) + if (gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon) { if(h->region_type & GF_REGION_ALTMAP && h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_nosse_altmap_multiply_region) else if(h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region; - else if(h->region_type & GF_REGION_ALTMAP && issse3) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region) +#if defined(INTEL_SSSE3) + else if(h->region_type & GF_REGION_ALTMAP && gf_cpu_supports_intel_ssse3) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_sse_altmap_multiply_region) +#endif } else { +#endif if(h->region_type & GF_REGION_SIMD) return 0; else if(h->region_type & GF_REGION_ALTMAP) - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_nosse_altmap_multiply_region) else - gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region) +#if defined(INTEL_SSSE3) || defined(ARM_NEON) } +#endif } return 1; @@ -1313,7 +1308,7 @@ int gf_w16_table_init(gf_t *gf) { gf_w16_log_init(gf); - gf->multiply_region.w32 = gf_w16_table_lazy_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_table_lazy_multiply_region) return 1; } @@ -1844,28 +1839,30 @@ int gf_w16_bytwo_init(gf_t *gf) } if (h->mult_type == GF_MULT_BYTWO_p) { - gf->multiply.w32 = gf_w16_bytwo_p_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w16_bytwo_p_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region; - else - gf->multiply_region.w32 = gf_w16_bytwo_p_sse_multiply_region; - #else - gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region; + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_sse_multiply_region) + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSE2 + } #endif } else { - gf->multiply.w32 = gf_w16_bytwo_b_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w16_bytwo_b_multiply) #ifdef INTEL_SSE2 - if (h->region_type & GF_REGION_NOSIMD) - gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region; - else - gf->multiply_region.w32 = gf_w16_bytwo_b_sse_multiply_region; - #else - gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region; + if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) { + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_sse_multiply_region) + } else { + #endif + SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_nosse_multiply_region) if(h->region_type & GF_REGION_SIMD) return 0; + #ifdef INTEL_SSE2 + } #endif } @@ -1904,10 +1901,10 @@ int gf_w16_log_zero_init(gf_t *gf) ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]]; } - gf->inverse.w32 = gf_w16_log_zero_inverse; - gf->divide.w32 = gf_w16_log_zero_divide; - gf->multiply.w32 = gf_w16_log_zero_multiply; - gf->multiply_region.w32 = gf_w16_log_zero_multiply_region; + SET_FUNCTION(gf,inverse,w32,gf_w16_log_zero_inverse) + SET_FUNCTION(gf,divide,w32,gf_w16_log_zero_divide) + SET_FUNCTION(gf,multiply,w32,gf_w16_log_zero_multiply) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_log_zero_multiply_region) return 1; } @@ -2145,18 +2142,18 @@ int gf_w16_composite_init(gf_t *gf) cd->mult_table = gf_w8_get_mult_table(h->base_gf); if (h->region_type & GF_REGION_ALTMAP) { - gf->multiply_region.w32 = gf_w16_composite_multiply_region_alt; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_composite_multiply_region_alt) } else { - gf->multiply_region.w32 = gf_w16_composite_multiply_region; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_composite_multiply_region) } if (cd->mult_table == NULL) { - gf->multiply.w32 = gf_w16_composite_multiply_recursive; + SET_FUNCTION(gf,multiply,w32,gf_w16_composite_multiply_recursive) } else { - gf->multiply.w32 = gf_w16_composite_multiply_inline; + SET_FUNCTION(gf,multiply,w32,gf_w16_composite_multiply_inline) } - gf->divide.w32 = NULL; - gf->inverse.w32 = gf_w16_composite_inverse; + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,gf_w16_composite_inverse) return 1; } @@ -2277,10 +2274,10 @@ int gf_w16_group_init(gf_t *gf) d44->reduce[p>>16] = (p&0xffff); } - gf->multiply.w32 = gf_w16_group_4_4_multiply; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = gf_w16_group_4_4_region_multiply; + SET_FUNCTION(gf,multiply,w32,gf_w16_group_4_4_multiply) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,gf_w16_group_4_4_region_multiply) return 1; } @@ -2360,10 +2357,10 @@ int gf_w16_init(gf_t *gf) if (h->mult_type != GF_MULT_COMPOSITE) h->prim_poly |= (1 << 16); - gf->multiply.w32 = NULL; - gf->divide.w32 = NULL; - gf->inverse.w32 = NULL; - gf->multiply_region.w32 = NULL; + SET_FUNCTION(gf,multiply,w32,NULL) + SET_FUNCTION(gf,divide,w32,NULL) + SET_FUNCTION(gf,inverse,w32,NULL) + SET_FUNCTION(gf,multiply_region,w32,NULL) switch(h->mult_type) { case GF_MULT_LOG_ZERO: if (gf_w16_log_zero_init(gf) == 0) return 0; break; @@ -2380,34 +2377,34 @@ int gf_w16_init(gf_t *gf) default: return 0; } if (h->divide_type == GF_DIVIDE_EUCLID) { - gf->divide.w32 = gf_w16_divide_from_inverse; - gf->inverse.w32 = gf_w16_euclid; + SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w16_euclid) } else if (h->divide_type == GF_DIVIDE_MATRIX) { - gf->divide.w32 = gf_w16_divide_from_inverse; - gf->inverse.w32 = gf_w16_matrix; + SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse) + SET_FUNCTION(gf,inverse,w32,gf_w16_matrix) } if (gf->divide.w32 == NULL) { - gf->divide.w32 = gf_w16_divide_from_inverse; - if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w16_euclid; + SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse) + if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w16_euclid) } - if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w16_inverse_from_divide; + if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w16_inverse_from_divide) if (h->region_type & GF_REGION_ALTMAP) { if (h->mult_type == GF_MULT_COMPOSITE) { - gf->extract_word.w32 = gf_w16_composite_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w16_composite_extract_word) } else { - gf->extract_word.w32 = gf_w16_split_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w16_split_extract_word) } } else if (h->region_type == GF_REGION_CAUCHY) { - gf->multiply_region.w32 = gf_wgen_cauchy_region; - gf->extract_word.w32 = gf_wgen_extract_word; + SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region) + SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word) } else { - gf->extract_word.w32 = gf_w16_extract_word; + SET_FUNCTION(gf,extract_word,w32,gf_w16_extract_word) } if (gf->multiply_region.w32 == NULL) { - gf->multiply_region.w32 = gf_w16_multiply_region_from_single; + SET_FUNCTION(gf,multiply_region,w32,gf_w16_multiply_region_from_single) } return 1; } |