summaryrefslogtreecommitdiff
path: root/src/gf_w32.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/gf_w32.c')
-rw-r--r--src/gf_w32.c84
1 files changed, 22 insertions, 62 deletions
diff --git a/src/gf_w32.c b/src/gf_w32.c
index 5ec2aa7..2e187fd 100644
--- a/src/gf_w32.c
+++ b/src/gf_w32.c
@@ -12,59 +12,7 @@
#include "gf_int.h"
#include <stdio.h>
#include <stdlib.h>
-
-#define GF_FIELD_WIDTH (32)
-#define GF_FIRST_BIT (1 << 31)
-
-#define GF_BASE_FIELD_WIDTH (16)
-#define GF_BASE_FIELD_SIZE (1 << GF_BASE_FIELD_WIDTH)
-#define GF_BASE_FIELD_GROUP_SIZE GF_BASE_FIELD_SIZE-1
-#define GF_MULTBY_TWO(p) (((p) & GF_FIRST_BIT) ? (((p) << 1) ^ h->prim_poly) : (p) << 1)
-
-struct gf_split_2_32_lazy_data {
- uint32_t tables[16][4];
- uint32_t last_value;
-};
-
-struct gf_w32_split_8_8_data {
- uint32_t tables[7][256][256];
- uint32_t region_tables[4][256];
- uint32_t last_value;
-};
-
-struct gf_w32_group_data {
- uint32_t *reduce;
- uint32_t *shift;
- int tshift;
- uint64_t rmask;
- uint32_t *memory;
-};
-
-struct gf_split_16_32_lazy_data {
- uint32_t tables[2][(1<<16)];
- uint32_t last_value;
-};
-
-struct gf_split_8_32_lazy_data {
- uint32_t tables[4][256];
- uint32_t last_value;
-};
-
-struct gf_split_4_32_lazy_data {
- uint32_t tables[8][16];
- uint32_t last_value;
-};
-
-struct gf_w32_bytwo_data {
- uint64_t prim_poly;
- uint64_t mask1;
- uint64_t mask2;
-};
-
-struct gf_w32_composite_data {
- uint16_t *log;
- uint16_t *alog;
-};
+#include "gf_w32.h"
#define MM_PRINT32(s, r) { uint8_t blah[16], ii; printf("%-12s", s); _mm_storeu_si128((__m128i *)blah, r); for (ii = 0; ii < 16; ii += 4) printf(" %02x%02x%02x%02x", blah[15-ii], blah[14-ii], blah[13-ii], blah[12-ii]); printf("\n"); }
@@ -1434,25 +1382,25 @@ int gf_w32_bytwo_init(gf_t *gf)
if (h->mult_type == GF_MULT_BYTWO_p) {
gf->multiply.w32 = gf_w32_bytwo_p_multiply;
#ifdef INTEL_SSE2
- if (h->region_type & GF_REGION_NOSSE)
+ if (h->region_type & GF_REGION_NOSIMD)
gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region;
else
gf->multiply_region.w32 = gf_w32_bytwo_p_sse_multiply_region;
#else
gf->multiply_region.w32 = gf_w32_bytwo_p_nosse_multiply_region;
- if(h->region_type & GF_REGION_SSE)
+ if(h->region_type & GF_REGION_SIMD)
return 0;
#endif
} else {
gf->multiply.w32 = gf_w32_bytwo_b_multiply;
#ifdef INTEL_SSE2
- if (h->region_type & GF_REGION_NOSSE)
+ if (h->region_type & GF_REGION_NOSIMD)
gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region;
else
gf->multiply_region.w32 = gf_w32_bytwo_b_sse_multiply_region;
#else
gf->multiply_region.w32 = gf_w32_bytwo_b_nosse_multiply_region;
- if(h->region_type & GF_REGION_SSE)
+ if(h->region_type & GF_REGION_SIMD)
return 0;
#endif
}
@@ -2283,6 +2231,7 @@ int gf_w32_split_init(gf_t *gf)
struct gf_split_16_32_lazy_data *d16;
uint32_t p, basep;
int i, j, exp, ispclmul, issse3;
+ int isneon = 0;
#if defined(INTEL_SSE4_PCLMUL)
ispclmul = 1;
@@ -2295,6 +2244,9 @@ int gf_w32_split_init(gf_t *gf)
#else
issse3 = 0;
#endif
+#ifdef ARM_NEON
+ isneon = 1;
+#endif
h = (gf_internal_t *) gf->scratch;
@@ -2335,13 +2287,13 @@ int gf_w32_split_init(gf_t *gf)
ld2 = (struct gf_split_2_32_lazy_data *) h->private;
ld2->last_value = 0;
#ifdef INTEL_SSSE3
- if (!(h->region_type & GF_REGION_NOSSE))
+ if (!(h->region_type & GF_REGION_NOSIMD))
gf->multiply_region.w32 = gf_w32_split_2_32_lazy_sse_multiply_region;
else
gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region;
#else
gf->multiply_region.w32 = gf_w32_split_2_32_lazy_multiply_region;
- if(h->region_type & GF_REGION_SSE) return 0;
+ if(h->region_type & GF_REGION_SIMD) return 0;
#endif
return 1;
}
@@ -2349,11 +2301,15 @@ int gf_w32_split_init(gf_t *gf)
/* 4/32 or Default + SSE - There is no ALTMAP/NOSSE. */
if ((h->arg1 == 4 && h->arg2 == 32) || (h->arg1 == 32 && h->arg2 == 4) ||
- (issse3 && h->mult_type == GF_REGION_DEFAULT)) {
+ ((issse3 || isneon) && h->mult_type == GF_REGION_DEFAULT)) {
ld4 = (struct gf_split_4_32_lazy_data *) h->private;
ld4->last_value = 0;
- if ((h->region_type & GF_REGION_NOSSE) || !issse3) {
+ if ((h->region_type & GF_REGION_NOSIMD) || !(issse3 || isneon)) {
gf->multiply_region.w32 = gf_w32_split_4_32_lazy_multiply_region;
+ } else if (isneon) {
+#ifdef ARM_NEON
+ gf_w32_neon_split_init(gf);
+#endif
} else if (h->region_type & GF_REGION_ALTMAP) {
gf->multiply_region.w32 = gf_w32_split_4_32_lazy_sse_altmap_multiply_region;
} else {
@@ -2731,10 +2687,14 @@ int gf_w32_composite_init(gf_t *gf)
int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg1, int arg2)
{
int issse3 = 0;
+ int isneon = 0;
#ifdef INTEL_SSSE3
issse3 = 1;
#endif
+#ifdef ARM_NEON
+ isneon = 1;
+#endif
switch(mult_type)
{
@@ -2760,7 +2720,7 @@ int gf_w32_scratch_size(int mult_type, int region_type, int divide_type, int arg
return sizeof(gf_internal_t) + sizeof(struct gf_split_2_32_lazy_data) + 64;
}
if ((arg1 == 8 && arg2 == 32) || (arg2 == 8 && arg1 == 32) ||
- (mult_type == GF_MULT_DEFAULT && !issse3)) {
+ (mult_type == GF_MULT_DEFAULT && !(issse3 || isneon))) {
return sizeof(gf_internal_t) + sizeof(struct gf_split_8_32_lazy_data) + 64;
}
if ((arg1 == 4 && arg2 == 32) ||