summaryrefslogtreecommitdiff
path: root/src/gf_w16.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gf_w16.c')
-rw-r--r--src/gf_w16.c231
1 files changed, 114 insertions, 117 deletions
diff --git a/src/gf_w16.c b/src/gf_w16.c
index 4e026b2..8316892 100644
--- a/src/gf_w16.c
+++ b/src/gf_w16.c
@@ -12,6 +12,7 @@
#include <stdio.h>
#include <stdlib.h>
#include "gf_w16.h"
+#include "gf_cpu.h"
#define AB2(ip, am1 ,am2, b, t1, t2) {\
t1 = (b << 1) & am1;\
@@ -391,6 +392,7 @@ gf_val_32_t gf_w16_matrix (gf_t *gf, gf_val_32_t b)
extra memory.
*/
+#if defined(INTEL_SSE4_PCLMUL)
static
inline
gf_val_32_t
@@ -398,8 +400,6 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -433,11 +433,11 @@ gf_w16_clm_multiply_2 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
-
-#endif
return rv;
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
inline
gf_val_32_t
@@ -445,8 +445,6 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -473,11 +471,11 @@ gf_w16_clm_multiply_3 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
-
-#endif
return rv;
}
+#endif
+#if defined(INTEL_SSE4_PCLMUL)
static
inline
gf_val_32_t
@@ -485,8 +483,6 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
{
gf_val_32_t rv = 0;
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a, b;
__m128i result;
__m128i prim_poly;
@@ -515,10 +511,9 @@ gf_w16_clm_multiply_4 (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
-
-#endif
return rv;
}
+#endif
static
@@ -548,7 +543,7 @@ gf_w16_shift_multiply (gf_t *gf, gf_val_32_t a16, gf_val_32_t b16)
static
int gf_w16_shift_init(gf_t *gf)
{
- gf->multiply.w32 = gf_w16_shift_multiply;
+ SET_FUNCTION(gf,multiply,w32,gf_w16_shift_multiply)
return 1;
}
@@ -556,25 +551,27 @@ static
int gf_w16_cfm_init(gf_t *gf)
{
#if defined(INTEL_SSE4_PCLMUL)
- gf_internal_t *h;
+ if (gf_cpu_supports_intel_pclmul) {
+ gf_internal_t *h;
- h = (gf_internal_t *) gf->scratch;
-
- /*Ben: Determining how many reductions to do */
-
- if ((0xfe00 & h->prim_poly) == 0) {
- gf->multiply.w32 = gf_w16_clm_multiply_2;
- gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_2;
- } else if((0xf000 & h->prim_poly) == 0) {
- gf->multiply.w32 = gf_w16_clm_multiply_3;
- gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_3;
- } else if ((0xe000 & h->prim_poly) == 0) {
- gf->multiply.w32 = gf_w16_clm_multiply_4;
- gf->multiply_region.w32 = gf_w16_clm_multiply_region_from_single_4;
- } else {
- return 0;
- }
- return 1;
+ h = (gf_internal_t *) gf->scratch;
+
+ /*Ben: Determining how many reductions to do */
+
+ if ((0xfe00 & h->prim_poly) == 0) {
+ SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_2)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_2)
+ } else if((0xf000 & h->prim_poly) == 0) {
+ SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_3)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_3)
+ } else if ((0xe000 & h->prim_poly) == 0) {
+ SET_FUNCTION(gf,multiply,w32,gf_w16_clm_multiply_4)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_clm_multiply_region_from_single_4)
+ } else {
+ return 0;
+ }
+ return 1;
+ }
#endif
return 0;
@@ -688,10 +685,9 @@ int gf_w16_log_init(gf_t *gf)
if (check) {
if (h->mult_type != GF_MULT_LOG_TABLE) {
-
-#if defined(INTEL_SSE4_PCLMUL)
- return gf_w16_cfm_init(gf);
-#endif
+ if (gf_cpu_supports_intel_pclmul) {
+ return gf_w16_cfm_init(gf);
+ }
return gf_w16_shift_init(gf);
} else {
_gf_errno = GF_E_LOGPOLY;
@@ -705,10 +701,10 @@ int gf_w16_log_init(gf_t *gf)
ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]];
}
- gf->inverse.w32 = gf_w16_log_inverse;
- gf->divide.w32 = gf_w16_log_divide;
- gf->multiply.w32 = gf_w16_log_multiply;
- gf->multiply_region.w32 = gf_w16_log_multiply_region;
+ SET_FUNCTION(gf,inverse,w32,gf_w16_log_inverse)
+ SET_FUNCTION(gf,divide,w32,gf_w16_log_divide)
+ SET_FUNCTION(gf,multiply,w32,gf_w16_log_multiply)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_log_multiply_region)
return 1;
}
@@ -948,11 +944,11 @@ gf_w16_table_lazy_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t v
gf_do_final_region_alignment(&rd);
}
+#ifdef INTEL_SSSE3
static
void
gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
uint64_t i, j, *s64, *d64, *top64;;
uint64_t c, prod;
uint8_t low[4][16];
@@ -1078,14 +1074,14 @@ gf_w16_split_4_16_lazy_sse_multiply_region(gf_t *gf, void *src, void *dest, gf_v
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
+#ifdef INTEL_SSSE3
static
void
gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *dest, gf_val_32_t val, int bytes, int xor)
{
-#ifdef INTEL_SSSE3
uint64_t i, j, *s64, *d64, *top64;;
uint64_t c, prod;
uint8_t low[4][16];
@@ -1187,8 +1183,8 @@ gf_w16_split_4_16_lazy_sse_altmap_multiply_region(gf_t *gf, void *src, void *des
}
gf_do_final_region_alignment(&rd);
-#endif
}
+#endif
uint32_t
gf_w16_split_8_8_multiply(gf_t *gf, gf_val_32_t a, gf_val_32_t b)
@@ -1216,21 +1212,11 @@ int gf_w16_split_init(gf_t *gf)
{
gf_internal_t *h;
struct gf_w16_split_8_8_data *d8;
- int i, j, exp, issse3;
- int isneon = 0;
+ int i, j, exp;
uint32_t p, basep, tmp;
h = (gf_internal_t *) gf->scratch;
-#ifdef INTEL_SSSE3
- issse3 = 1;
-#else
- issse3 = 0;
-#endif
-#ifdef ARM_NEON
- isneon = 1;
-#endif
-
if (h->arg1 == 8 && h->arg2 == 8) {
d8 = (struct gf_w16_split_8_8_data *) h->private;
basep = 1;
@@ -1260,8 +1246,8 @@ int gf_w16_split_init(gf_t *gf)
}
for (i = 0; i < 8; i++) basep = GF_MULTBY_TWO(basep);
}
- gf->multiply.w32 = gf_w16_split_8_8_multiply;
- gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region;
+ SET_FUNCTION(gf,multiply,w32,gf_w16_split_8_8_multiply)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region)
return 1;
}
@@ -1273,36 +1259,45 @@ int gf_w16_split_init(gf_t *gf)
/* Defaults */
- if (issse3) {
- gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_multiply_region;
- } else if (isneon) {
-#ifdef ARM_NEON
+#ifdef INTEL_SSSE3
+ if (gf_cpu_supports_intel_ssse3) {
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_sse_multiply_region)
+ } else {
+#elif ARM_NEON
+ if (gf_cpu_supports_arm_neon) {
gf_w16_neon_split_init(gf);
-#endif
} else {
- gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region;
+#endif
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region)
+#if defined(INTEL_SSSE3) || defined(ARM_NEON)
}
-
+#endif
if ((h->arg1 == 8 && h->arg2 == 16) || (h->arg2 == 8 && h->arg1 == 16)) {
- gf->multiply_region.w32 = gf_w16_split_8_16_lazy_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_8_16_lazy_multiply_region)
} else if ((h->arg1 == 4 && h->arg2 == 16) || (h->arg2 == 4 && h->arg1 == 16)) {
- if (issse3 || isneon) {
+#if defined(INTEL_SSSE3) || defined(ARM_NEON)
+ if (gf_cpu_supports_intel_ssse3 || gf_cpu_supports_arm_neon) {
if(h->region_type & GF_REGION_ALTMAP && h->region_type & GF_REGION_NOSIMD)
- gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_nosse_altmap_multiply_region)
else if(h->region_type & GF_REGION_NOSIMD)
- gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region;
- else if(h->region_type & GF_REGION_ALTMAP && issse3)
- gf->multiply_region.w32 = gf_w16_split_4_16_lazy_sse_altmap_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region)
+#if defined(INTEL_SSSE3)
+ else if(h->region_type & GF_REGION_ALTMAP && gf_cpu_supports_intel_ssse3)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_sse_altmap_multiply_region)
+#endif
} else {
+#endif
if(h->region_type & GF_REGION_SIMD)
return 0;
else if(h->region_type & GF_REGION_ALTMAP)
- gf->multiply_region.w32 = gf_w16_split_4_16_lazy_nosse_altmap_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_nosse_altmap_multiply_region)
else
- gf->multiply_region.w32 = gf_w16_split_4_16_lazy_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_split_4_16_lazy_multiply_region)
+#if defined(INTEL_SSSE3) || defined(ARM_NEON)
}
+#endif
}
return 1;
@@ -1313,7 +1308,7 @@ int gf_w16_table_init(gf_t *gf)
{
gf_w16_log_init(gf);
- gf->multiply_region.w32 = gf_w16_table_lazy_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_table_lazy_multiply_region)
return 1;
}
@@ -1844,28 +1839,30 @@ int gf_w16_bytwo_init(gf_t *gf)
}
if (h->mult_type == GF_MULT_BYTWO_p) {
- gf->multiply.w32 = gf_w16_bytwo_p_multiply;
+ SET_FUNCTION(gf,multiply,w32,gf_w16_bytwo_p_multiply)
#ifdef INTEL_SSE2
- if (h->region_type & GF_REGION_NOSIMD)
- gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region;
- else
- gf->multiply_region.w32 = gf_w16_bytwo_p_sse_multiply_region;
- #else
- gf->multiply_region.w32 = gf_w16_bytwo_p_nosse_multiply_region;
+ if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) {
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_sse_multiply_region)
+ } else {
+ #endif
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_p_nosse_multiply_region)
if(h->region_type & GF_REGION_SIMD)
return 0;
+ #ifdef INTEL_SSE2
+ }
#endif
} else {
- gf->multiply.w32 = gf_w16_bytwo_b_multiply;
+ SET_FUNCTION(gf,multiply,w32,gf_w16_bytwo_b_multiply)
#ifdef INTEL_SSE2
- if (h->region_type & GF_REGION_NOSIMD)
- gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region;
- else
- gf->multiply_region.w32 = gf_w16_bytwo_b_sse_multiply_region;
- #else
- gf->multiply_region.w32 = gf_w16_bytwo_b_nosse_multiply_region;
+ if (gf_cpu_supports_intel_sse2 && !(h->region_type & GF_REGION_NOSIMD)) {
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_sse_multiply_region)
+ } else {
+ #endif
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_bytwo_b_nosse_multiply_region)
if(h->region_type & GF_REGION_SIMD)
return 0;
+ #ifdef INTEL_SSE2
+ }
#endif
}
@@ -1904,10 +1901,10 @@ int gf_w16_log_zero_init(gf_t *gf)
ltd->inv_tbl[i] = ltd->antilog_tbl[GF_MULT_GROUP_SIZE-ltd->log_tbl[i]];
}
- gf->inverse.w32 = gf_w16_log_zero_inverse;
- gf->divide.w32 = gf_w16_log_zero_divide;
- gf->multiply.w32 = gf_w16_log_zero_multiply;
- gf->multiply_region.w32 = gf_w16_log_zero_multiply_region;
+ SET_FUNCTION(gf,inverse,w32,gf_w16_log_zero_inverse)
+ SET_FUNCTION(gf,divide,w32,gf_w16_log_zero_divide)
+ SET_FUNCTION(gf,multiply,w32,gf_w16_log_zero_multiply)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_log_zero_multiply_region)
return 1;
}
@@ -2145,18 +2142,18 @@ int gf_w16_composite_init(gf_t *gf)
cd->mult_table = gf_w8_get_mult_table(h->base_gf);
if (h->region_type & GF_REGION_ALTMAP) {
- gf->multiply_region.w32 = gf_w16_composite_multiply_region_alt;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_composite_multiply_region_alt)
} else {
- gf->multiply_region.w32 = gf_w16_composite_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_composite_multiply_region)
}
if (cd->mult_table == NULL) {
- gf->multiply.w32 = gf_w16_composite_multiply_recursive;
+ SET_FUNCTION(gf,multiply,w32,gf_w16_composite_multiply_recursive)
} else {
- gf->multiply.w32 = gf_w16_composite_multiply_inline;
+ SET_FUNCTION(gf,multiply,w32,gf_w16_composite_multiply_inline)
}
- gf->divide.w32 = NULL;
- gf->inverse.w32 = gf_w16_composite_inverse;
+ SET_FUNCTION(gf,divide,w32,NULL)
+ SET_FUNCTION(gf,inverse,w32,gf_w16_composite_inverse)
return 1;
}
@@ -2277,10 +2274,10 @@ int gf_w16_group_init(gf_t *gf)
d44->reduce[p>>16] = (p&0xffff);
}
- gf->multiply.w32 = gf_w16_group_4_4_multiply;
- gf->divide.w32 = NULL;
- gf->inverse.w32 = NULL;
- gf->multiply_region.w32 = gf_w16_group_4_4_region_multiply;
+ SET_FUNCTION(gf,multiply,w32,gf_w16_group_4_4_multiply)
+ SET_FUNCTION(gf,divide,w32,NULL)
+ SET_FUNCTION(gf,inverse,w32,NULL)
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_group_4_4_region_multiply)
return 1;
}
@@ -2360,10 +2357,10 @@ int gf_w16_init(gf_t *gf)
if (h->mult_type != GF_MULT_COMPOSITE) h->prim_poly |= (1 << 16);
- gf->multiply.w32 = NULL;
- gf->divide.w32 = NULL;
- gf->inverse.w32 = NULL;
- gf->multiply_region.w32 = NULL;
+ SET_FUNCTION(gf,multiply,w32,NULL)
+ SET_FUNCTION(gf,divide,w32,NULL)
+ SET_FUNCTION(gf,inverse,w32,NULL)
+ SET_FUNCTION(gf,multiply_region,w32,NULL)
switch(h->mult_type) {
case GF_MULT_LOG_ZERO: if (gf_w16_log_zero_init(gf) == 0) return 0; break;
@@ -2380,34 +2377,34 @@ int gf_w16_init(gf_t *gf)
default: return 0;
}
if (h->divide_type == GF_DIVIDE_EUCLID) {
- gf->divide.w32 = gf_w16_divide_from_inverse;
- gf->inverse.w32 = gf_w16_euclid;
+ SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse)
+ SET_FUNCTION(gf,inverse,w32,gf_w16_euclid)
} else if (h->divide_type == GF_DIVIDE_MATRIX) {
- gf->divide.w32 = gf_w16_divide_from_inverse;
- gf->inverse.w32 = gf_w16_matrix;
+ SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse)
+ SET_FUNCTION(gf,inverse,w32,gf_w16_matrix)
}
if (gf->divide.w32 == NULL) {
- gf->divide.w32 = gf_w16_divide_from_inverse;
- if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w16_euclid;
+ SET_FUNCTION(gf,divide,w32,gf_w16_divide_from_inverse)
+ if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w16_euclid)
}
- if (gf->inverse.w32 == NULL) gf->inverse.w32 = gf_w16_inverse_from_divide;
+ if (gf->inverse.w32 == NULL) SET_FUNCTION(gf,inverse,w32,gf_w16_inverse_from_divide)
if (h->region_type & GF_REGION_ALTMAP) {
if (h->mult_type == GF_MULT_COMPOSITE) {
- gf->extract_word.w32 = gf_w16_composite_extract_word;
+ SET_FUNCTION(gf,extract_word,w32,gf_w16_composite_extract_word)
} else {
- gf->extract_word.w32 = gf_w16_split_extract_word;
+ SET_FUNCTION(gf,extract_word,w32,gf_w16_split_extract_word)
}
} else if (h->region_type == GF_REGION_CAUCHY) {
- gf->multiply_region.w32 = gf_wgen_cauchy_region;
- gf->extract_word.w32 = gf_wgen_extract_word;
+ SET_FUNCTION(gf,multiply_region,w32,gf_wgen_cauchy_region)
+ SET_FUNCTION(gf,extract_word,w32,gf_wgen_extract_word)
} else {
- gf->extract_word.w32 = gf_w16_extract_word;
+ SET_FUNCTION(gf,extract_word,w32,gf_w16_extract_word)
}
if (gf->multiply_region.w32 == NULL) {
- gf->multiply_region.w32 = gf_w16_multiply_region_from_single;
+ SET_FUNCTION(gf,multiply_region,w32,gf_w16_multiply_region_from_single)
}
return 1;
}