summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdam Disney <adisney1@vols.utk.edu>2014-06-06 13:09:04 -0400
committerAdam Disney <adisney1@vols.utk.edu>2014-06-06 13:09:04 -0400
commit6bb1ebb9f4579d8faafb6124da33178da217a133 (patch)
tree7e546c2145749310fe7c11c754ccb8aef2b76d13
parent9d53ea590b243170900d82111f6f04d75977e1cb (diff)
downloadgf-complete-6bb1ebb9f4579d8faafb6124da33178da217a133.tar.gz
Implemented CARRY_FREE_GK. Sections added are tagged with a comment //ADAM
for easy navigation.
-rw-r--r--include/gf_complete.h15
-rw-r--r--src/gf.c10
-rw-r--r--src/gf_method.c4
-rw-r--r--src/gf_w32.c144
-rw-r--r--tools/gf_methods.c5
5 files changed, 163 insertions, 15 deletions
diff --git a/include/gf_complete.h b/include/gf_complete.h
index 57b439e..0469b77 100644
--- a/include/gf_complete.h
+++ b/include/gf_complete.h
@@ -33,17 +33,18 @@
Not all are implemented for all values of w.
See the paper for an explanation of how they work. */
-typedef enum {GF_MULT_DEFAULT,
- GF_MULT_SHIFT,
- GF_MULT_CARRY_FREE,
- GF_MULT_GROUP,
+typedef enum {GF_MULT_DEFAULT,
+ GF_MULT_SHIFT,
+ GF_MULT_CARRY_FREE,
+ GF_MULT_CARRY_FREE_GK, //ADAM
+ GF_MULT_GROUP,
GF_MULT_BYTWO_p,
GF_MULT_BYTWO_b,
- GF_MULT_TABLE,
- GF_MULT_LOG_TABLE,
+ GF_MULT_TABLE,
+ GF_MULT_LOG_TABLE,
GF_MULT_LOG_ZERO,
GF_MULT_LOG_ZERO_EXT,
- GF_MULT_SPLIT_TABLE,
+ GF_MULT_SPLIT_TABLE,
GF_MULT_COMPOSITE } gf_mult_type_t;
/* These are the different ways to optimize region
diff --git a/src/gf.c b/src/gf.c
index da714d2..1955559 100644
--- a/src/gf.c
+++ b/src/gf.c
@@ -286,6 +286,16 @@ int gf_error_check(int w, int mult_type, int region_type, int divide_type,
return 1;
}
+ //ADAM
+ if (mult_type == GF_MULT_CARRY_FREE_GK) {
+ if (w != 4 && w != 8 && w != 16 &&
+ w != 32 && w != 64 && w != 128) { _gf_errno = GF_E_CFM___W; return 0; }
+ if (raltmap) { _gf_errno = GF_E_ALT_CFM; return 0; }
+ if (rsse || rnosse) { _gf_errno = GF_E_SSE_CFM; return 0; }
+ if (!pclmul) { _gf_errno = GF_E_PCLMULX; return 0; }
+ return 1;
+ }
+
if (mult_type == GF_MULT_BYTWO_p || mult_type == GF_MULT_BYTWO_b) {
if (raltmap) { _gf_errno = GF_E_ALT_BY2; return 0; }
if (rsse && !sse2) { _gf_errno = GF_E_BY2_SSE; return 0; }
diff --git a/src/gf_method.c b/src/gf_method.c
index a7bcacf..90d62af 100644
--- a/src/gf_method.c
+++ b/src/gf_method.c
@@ -47,6 +47,10 @@ int create_gf_from_argv(gf_t *gf, int w, int argc, char **argv, int starting)
} else if (strcmp(argv[starting], "CARRY_FREE") == 0) {
mult_type = GF_MULT_CARRY_FREE;
starting++;
+ //ADAM
+ } else if (strcmp(argv[starting], "CARRY_FREE_GK") == 0) {
+ mult_type = GF_MULT_CARRY_FREE_GK;
+ starting++;
} else if (strcmp(argv[starting], "GROUP") == 0) {
mult_type = GF_MULT_GROUP;
if (argc < starting + 3) {
diff --git a/src/gf_w32.c b/src/gf_w32.c
index 8f7790c..c90c7fb 100644
--- a/src/gf_w32.c
+++ b/src/gf_w32.c
@@ -399,7 +399,94 @@ uint32_t gf_w32_matrix (gf_t *gf, uint32_t b)
extra memory.
*/
+//ADAM
+static
+inline
+gf_val_32_t
+gf_w32_cfmgk_multiply (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
+{
+ gf_val_32_t rv = 0;
+
+#if defined(INTEL_SSE4_PCLMUL)
+
+ __m128i a, b;
+ __m128i result;
+ __m128i w;
+ __m128i g, q;
+ gf_internal_t * h = gf->scratch;
+ uint64_t g_star, q_plus;
+ q_plus = *(uint64_t *) h->private;
+ g_star = *((uint64_t *) h->private + 1);
+
+ a = _mm_insert_epi32 (_mm_setzero_si128(), a32, 0);
+ b = _mm_insert_epi32 (a, b32, 0);
+ g = _mm_insert_epi64 (a, g_star, 0);
+ q = _mm_insert_epi64 (a, q_plus, 0);
+
+ result = _mm_clmulepi64_si128 (a, b, 0);
+ w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
+ w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
+ result = _mm_xor_si128 (result, w);
+
+ /* Extracts 32 bit value from result. */
+ rv = ((gf_val_32_t)_mm_extract_epi32(result, 0));
+#endif
+ return rv;
+}
+
+//ADAM
+#if defined(INTEL_SSE4_PCLMUL)
+
+static
+void
+gf_w32_cfmgk_multiply_region_from_single(gf_t *gf, void *src, void *dest, uint32_t val, int bytes, int xor)
+{
+
+ int i;
+ uint32_t *s32;
+ uint32_t *d32;
+
+ __m128i a, b;
+ __m128i result;
+ __m128i w;
+ __m128i g, q;
+ gf_internal_t * h = gf->scratch;
+ uint64_t g_star, q_plus;
+
+ if (val == 0) { gf_multby_zero(dest, bytes, xor); return; }
+ if (val == 1) { gf_multby_one(src, dest, bytes, xor); return; }
+
+ q_plus = *(uint64_t *) h->private;
+ g_star = *((uint64_t *) h->private + 1);
+
+ g = _mm_insert_epi64 (a, g_star, 0);
+ q = _mm_insert_epi64 (a, q_plus, 0);
+ a = _mm_insert_epi32 (_mm_setzero_si128(), val, 0);
+ s32 = (uint32_t *) src;
+ d32 = (uint32_t *) dest;
+
+ if (xor) {
+ for (i = 0; i < bytes/sizeof(uint32_t); i++) {
+ b = _mm_insert_epi32 (a, s32[i], 0);
+ result = _mm_clmulepi64_si128 (a, b, 0);
+ w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
+ w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
+ result = _mm_xor_si128 (result, w);
+ d32[i] ^= ((gf_val_32_t)_mm_extract_epi32(result, 0));
+ }
+ } else {
+ for (i = 0; i < bytes/sizeof(uint32_t); i++) {
+ b = _mm_insert_epi32 (a, s32[i], 0);
+ result = _mm_clmulepi64_si128 (a, b, 0);
+ w = _mm_clmulepi64_si128 (q, _mm_srli_si128 (result, 4), 0);
+ w = _mm_clmulepi64_si128 (g, _mm_srli_si128 (w, 4), 0);
+ result = _mm_xor_si128 (result, w);
+ d32[i] = ((gf_val_32_t)_mm_extract_epi32(result, 0));
+ }
+ }
+}
+#endif
static
@@ -446,6 +533,7 @@ gf_w32_clm_multiply_2 (gf_t *gf, gf_val_32_t a32, gf_val_32_t b32)
#endif
return rv;
}
+
static
inline
gf_val_32_t
@@ -552,6 +640,45 @@ gf_w32_shift_multiply (gf_t *gf, uint32_t a32, uint32_t b32)
return product;
}
+//ADAM
+ static
+int gf_w32_cfmgk_init(gf_t *gf)
+{
+ gf->inverse.w32 = gf_w32_euclid;
+ gf->multiply_region.w32 = gf_w32_multiply_region_from_single;
+
+#if defined(INTEL_SSE4_PCLMUL)
+ gf_internal_t *h;
+
+ h = (gf_internal_t *) gf->scratch;
+ gf->multiply.w32 = gf_w32_cfmgk_multiply;
+ gf->multiply_region.w32 = gf_w32_cfmgk_multiply_region_from_single;
+
+ //setup in the private section the q+ and g* ADAM
+ uint64_t *q_plus = (uint64_t *) h->private;
+ uint64_t *g_star = (uint64_t *) h->private + 1;
+
+ //q+
+ uint64_t tmp = h->prim_poly << 32;
+ *q_plus = 1ULL << 32;
+
+ int i;
+ for(i = 63; i >= 32; i--)
+ if((1ULL << i) & tmp)
+ {
+ *q_plus |= 1ULL << (i-32);
+ tmp ^= h->prim_poly << (i-32);
+ }
+
+ //g*
+ *g_star = h->prim_poly & ((1ULL << 32) - 1);
+
+ return 1;
+#endif
+
+ return 0;
+}
+
static
int gf_w32_cfm_init(gf_t *gf)
{
@@ -2656,6 +2783,10 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg
case GF_MULT_CARRY_FREE:
return sizeof(gf_internal_t);
break;
+ //ADAM
+ case GF_MULT_CARRY_FREE_GK:
+ return sizeof(gf_internal_t) + sizeof(uint64_t)*2;
+ break;
case GF_MULT_SHIFT:
return sizeof(gf_internal_t);
break;
@@ -2703,14 +2834,15 @@ int gf_w32_init(gf_t *gf)
gf->multiply_region.w32 = NULL;
switch(h->mult_type) {
- case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
- case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
- case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
+ case GF_MULT_CARRY_FREE: if (gf_w32_cfm_init(gf) == 0) return 0; break;
+ case GF_MULT_CARRY_FREE_GK: if (gf_w32_cfmgk_init(gf) == 0) return 0; break; //ADAM
+ case GF_MULT_SHIFT: if (gf_w32_shift_init(gf) == 0) return 0; break;
+ case GF_MULT_COMPOSITE: if (gf_w32_composite_init(gf) == 0) return 0; break;
case GF_MULT_DEFAULT:
- case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
- case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break;
+ case GF_MULT_SPLIT_TABLE: if (gf_w32_split_init(gf) == 0) return 0; break;
+ case GF_MULT_GROUP: if (gf_w32_group_init(gf) == 0) return 0; break;
case GF_MULT_BYTWO_p:
- case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break;
+ case GF_MULT_BYTWO_b: if (gf_w32_bytwo_init(gf) == 0) return 0; break;
default: return 0;
}
if (h->divide_type == GF_DIVIDE_EUCLID) {
diff --git a/tools/gf_methods.c b/tools/gf_methods.c
index 6664bec..921febf 100644
--- a/tools/gf_methods.c
+++ b/tools/gf_methods.c
@@ -20,8 +20,9 @@
#define BNMULTS (8)
static char *BMULTS[BNMULTS] = { "CARRY_FREE", "GROUP48",
"TABLE", "LOG", "SPLIT4", "SPLIT8", "SPLIT88", "COMPOSITE" };
-#define NMULTS (16)
-static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
+//ADAM
+#define NMULTS (17)
+static char *MULTS[NMULTS] = { "SHIFT", "CARRY_FREE", "CARRY_FREE_GK", "GROUP44", "GROUP48", "BYTWO_p", "BYTWO_b",
"TABLE", "LOG", "LOG_ZERO", "LOG_ZERO_EXT", "SPLIT2",
"SPLIT4", "SPLIT8", "SPLIT16", "SPLIT88", "COMPOSITE" };