summaryrefslogtreecommitdiff
path: root/src/gf_w128.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gf_w128.c')
-rw-r--r--src/gf_w128.c114
1 files changed, 56 insertions, 58 deletions
diff --git a/src/gf_w128.c b/src/gf_w128.c
index b1e3d92..74f72e8 100644
--- a/src/gf_w128.c
+++ b/src/gf_w128.c
@@ -11,6 +11,7 @@
#include "gf_int.h"
#include <stdio.h>
#include <stdlib.h>
+#include "gf_cpu.h"
#define GF_FIELD_WIDTH (128)
@@ -290,11 +291,11 @@ gf_w128_shift_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_12
return;
}
+#if defined(INTEL_SSE4_PCLMUL)
+
void
gf_w128_clm_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
-#if defined(INTEL_SSE4_PCLMUL)
-
__m128i a,b;
__m128i result0,result1;
__m128i prim_poly;
@@ -338,9 +339,8 @@ gf_w128_clm_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_
c128[0] = (uint64_t)_mm_extract_epi64(result1,1);
c128[1] = (uint64_t)_mm_extract_epi64(result1,0);
-#endif
-return;
}
+#endif
void
gf_w128_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
@@ -376,10 +376,10 @@ gf_w128_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_
return;
}
+#if defined(INTEL_SSE4)
void
gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
-#if defined(INTEL_SSE4)
int i;
__m128i a, b, pp, prod, amask, u_middle_one;
/*John: pmask is always the highest bit set, and the rest zeros. amask changes, it's a countdown.*/
@@ -427,16 +427,16 @@ gf_w128_sse_bytwo_p_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
}
c128[0] = (uint64_t)_mm_extract_epi64(prod, 1);
c128[1] = (uint64_t)_mm_extract_epi64(prod, 0);
-#endif
return;
}
+#endif
/* Ben: This slow function implements sse instrutions for bytwo_b because why not */
+#if defined(INTEL_SSE4)
void
gf_w128_sse_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
-#if defined(INTEL_SSE4)
__m128i a, b, lmask, hmask, pp, c, middle_one;
gf_internal_t *h;
uint64_t topbit, middlebit;
@@ -471,8 +471,8 @@ gf_w128_sse_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_
if (middlebit) b = _mm_xor_si128(b, middle_one);
if (topbit) b = _mm_xor_si128(b, pp);
}
-#endif
}
+#endif
void
gf_w128_bytwo_b_multiply(gf_t *gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
@@ -1146,7 +1146,7 @@ gf_w128_group_multiply_region(gf_t *gf, void *src, void *dest, gf_val_128_t val,
}
/* a^-1 -> b */
- void
+void
gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
{
uint64_t e_i[2], e_im1[2], e_ip1[2];
@@ -1239,7 +1239,7 @@ gf_w128_euclid(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
return;
}
- void
+void
gf_w128_divide_from_inverse(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val_128_t c128)
{
uint64_t d[2];
@@ -1248,7 +1248,7 @@ gf_w128_divide_from_inverse(GFP gf, gf_val_128_t a128, gf_val_128_t b128, gf_val
return;
}
- void
+void
gf_w128_inverse_from_divide(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
{
uint64_t one128[2];
@@ -1260,7 +1260,7 @@ gf_w128_inverse_from_divide(GFP gf, gf_val_128_t a128, gf_val_128_t b128)
static
- void
+void
gf_w128_composite_inverse(gf_t *gf, gf_val_128_t a, gf_val_128_t inv)
{
gf_internal_t *h = (gf_internal_t *) gf->scratch;
@@ -1405,14 +1405,14 @@ int gf_w128_composite_init(gf_t *gf)
gf_internal_t *h = (gf_internal_t *) gf->scratch;
if (h->region_type & GF_REGION_ALTMAP) {
- gf->multiply_region.w128 = gf_w128_composite_multiply_region_alt;
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_composite_multiply_region_alt)
} else {
- gf->multiply_region.w128 = gf_w128_composite_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_composite_multiply_region)
}
- gf->multiply.w128 = gf_w128_composite_multiply;
- gf->divide.w128 = gf_w128_divide_from_inverse;
- gf->inverse.w128 = gf_w128_composite_inverse;
+ SET_FUNCTION(gf,multiply,w128,gf_w128_composite_multiply)
+ SET_FUNCTION(gf,divide,w128,gf_w128_divide_from_inverse)
+ SET_FUNCTION(gf,inverse,w128,gf_w128_composite_inverse)
return 1;
}
@@ -1421,10 +1421,12 @@ static
int gf_w128_cfm_init(gf_t *gf)
{
#if defined(INTEL_SSE4_PCLMUL)
- gf->inverse.w128 = gf_w128_euclid;
- gf->multiply.w128 = gf_w128_clm_multiply;
- gf->multiply_region.w128 = gf_w128_clm_multiply_region_from_single;
- return 1;
+ if (gf_cpu_supports_intel_pclmul) {
+ SET_FUNCTION(gf,inverse,w128,gf_w128_euclid)
+ SET_FUNCTION(gf,multiply,w128,gf_w128_clm_multiply)
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_clm_multiply_region_from_single)
+ return 1;
+ }
#endif
return 0;
@@ -1433,9 +1435,9 @@ int gf_w128_cfm_init(gf_t *gf)
static
int gf_w128_shift_init(gf_t *gf)
{
- gf->multiply.w128 = gf_w128_shift_multiply;
- gf->inverse.w128 = gf_w128_euclid;
- gf->multiply_region.w128 = gf_w128_multiply_region_from_single;
+ SET_FUNCTION(gf,multiply,w128,gf_w128_shift_multiply)
+ SET_FUNCTION(gf,inverse,w128,gf_w128_euclid)
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_multiply_region_from_single)
return 1;
}
@@ -1446,16 +1448,16 @@ int gf_w128_bytwo_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
if (h->mult_type == GF_MULT_BYTWO_p) {
- gf->multiply.w128 = gf_w128_bytwo_p_multiply;
- /*gf->multiply.w128 = gf_w128_sse_bytwo_p_multiply;*/
+ SET_FUNCTION(gf,multiply,w128,gf_w128_bytwo_p_multiply)
+ /*SET_FUNCTION(gf,multiply,w128,gf_w128_sse_bytwo_p_multiply)*/
/* John: the sse function is slower.*/
} else {
- gf->multiply.w128 = gf_w128_bytwo_b_multiply;
- /*gf->multiply.w128 = gf_w128_sse_bytwo_b_multiply;
+ SET_FUNCTION(gf,multiply,w128,gf_w128_bytwo_b_multiply)
+ /*SET_FUNCTION(gf,multiply,w128,gf_w128_sse_bytwo_b_multiply)
Ben: This sse function is also slower. */
}
- gf->inverse.w128 = gf_w128_euclid;
- gf->multiply_region.w128 = gf_w128_bytwo_b_multiply_region;
+ SET_FUNCTION(gf,inverse,w128,gf_w128_euclid)
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_bytwo_b_multiply_region)
return 1;
}
@@ -1525,20 +1527,20 @@ int gf_w128_split_init(gf_t *gf)
h = (gf_internal_t *) gf->scratch;
- gf->multiply.w128 = gf_w128_bytwo_p_multiply;
+ SET_FUNCTION(gf,multiply,w128,gf_w128_bytwo_p_multiply)
#if defined(INTEL_SSE4_PCLMUL)
- if (!(h->region_type & GF_REGION_NOSIMD)){
- gf->multiply.w128 = gf_w128_clm_multiply;
+ if (gf_cpu_supports_intel_pclmul && !(h->region_type & GF_REGION_NOSIMD)){
+ SET_FUNCTION(gf,multiply,w128,gf_w128_clm_multiply)
}
#endif
- gf->inverse.w128 = gf_w128_euclid;
+ SET_FUNCTION(gf,inverse,w128,gf_w128_euclid)
if ((h->arg1 != 4 && h->arg2 != 4) || h->mult_type == GF_MULT_DEFAULT) {
sd8 = (struct gf_w128_split_8_128_data *) h->private;
sd8->last_value[0] = 0;
sd8->last_value[1] = 0;
- gf->multiply_region.w128 = gf_w128_split_8_128_multiply_region;
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_8_128_multiply_region)
} else {
sd4 = (struct gf_w128_split_4_128_data *) h->private;
sd4->last_value[0] = 0;
@@ -1546,23 +1548,19 @@ int gf_w128_split_init(gf_t *gf)
if((h->region_type & GF_REGION_ALTMAP))
{
#ifdef INTEL_SSE4
- if(!(h->region_type & GF_REGION_NOSIMD))
- gf->multiply_region.w128 = gf_w128_split_4_128_sse_altmap_multiply_region;
+ if(gf_cpu_supports_intel_sse4 && !(h->region_type & GF_REGION_NOSIMD))
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_sse_altmap_multiply_region)
else
- return 0;
- #else
- return 0;
#endif
+ return 0;
}
else {
#ifdef INTEL_SSE4
- if(!(h->region_type & GF_REGION_NOSIMD))
- gf->multiply_region.w128 = gf_w128_split_4_128_sse_multiply_region;
+ if(gf_cpu_supports_intel_sse4 && !(h->region_type & GF_REGION_NOSIMD))
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_sse_multiply_region)
else
- gf->multiply_region.w128 = gf_w128_split_4_128_multiply_region;
- #else
- gf->multiply_region.w128 = gf_w128_split_4_128_multiply_region;
#endif
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_split_4_128_multiply_region)
}
}
return 1;
@@ -1586,9 +1584,9 @@ int gf_w128_group_init(gf_t *gf)
gt->m_table[2] = 0;
gt->m_table[3] = 0;
- gf->multiply.w128 = gf_w128_group_multiply;
- gf->inverse.w128 = gf_w128_euclid;
- gf->multiply_region.w128 = gf_w128_group_multiply_region;
+ SET_FUNCTION(gf,multiply,w128,gf_w128_group_multiply)
+ SET_FUNCTION(gf,inverse,w128,gf_w128_euclid)
+ SET_FUNCTION(gf,multiply_region,w128,gf_w128_group_multiply_region)
gf_w128_group_r_init(gf);
@@ -1738,10 +1736,10 @@ int gf_w128_init(gf_t *gf)
}
}
- gf->multiply.w128 = NULL;
- gf->divide.w128 = NULL;
- gf->inverse.w128 = NULL;
- gf->multiply_region.w128 = NULL;
+ SET_FUNCTION(gf,multiply,w128,NULL)
+ SET_FUNCTION(gf,divide,w128,NULL)
+ SET_FUNCTION(gf,inverse,w128,NULL)
+ SET_FUNCTION(gf,multiply_region,w128,NULL)
switch(h->mult_type) {
case GF_MULT_BYTWO_p:
case GF_MULT_BYTWO_b: if (gf_w128_bytwo_init(gf) == 0) return 0; break;
@@ -1757,22 +1755,22 @@ int gf_w128_init(gf_t *gf)
/* Ben: Used to be h->region_type == GF_REGION_ALTMAP, but failed since there
are multiple flags in h->region_type */
if (h->mult_type == GF_MULT_SPLIT_TABLE && (h->region_type & GF_REGION_ALTMAP)) {
- gf->extract_word.w128 = gf_w128_split_extract_word;
+ SET_FUNCTION(gf,extract_word,w128,gf_w128_split_extract_word)
} else if (h->mult_type == GF_MULT_COMPOSITE && h->region_type == GF_REGION_ALTMAP) {
- gf->extract_word.w128 = gf_w128_composite_extract_word;
+ SET_FUNCTION(gf,extract_word,w128,gf_w128_composite_extract_word)
} else {
- gf->extract_word.w128 = gf_w128_extract_word;
+ SET_FUNCTION(gf,extract_word,w128,gf_w128_extract_word)
}
if (h->divide_type == GF_DIVIDE_EUCLID) {
- gf->divide.w128 = gf_w128_divide_from_inverse;
+ SET_FUNCTION(gf,divide,w128,gf_w128_divide_from_inverse)
}
if (gf->inverse.w128 != NULL && gf->divide.w128 == NULL) {
- gf->divide.w128 = gf_w128_divide_from_inverse;
+ SET_FUNCTION(gf,divide,w128,gf_w128_divide_from_inverse)
}
if (gf->inverse.w128 == NULL && gf->divide.w128 != NULL) {
- gf->inverse.w128 = gf_w128_inverse_from_divide;
+ SET_FUNCTION(gf,inverse,w128,gf_w128_inverse_from_divide)
}
return 1;
}