summaryrefslogtreecommitdiff
path: root/src/gf_cpu.c
blob: fae2cd58431f7cfeb5ece3d942a16102c9bc298f (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
/*
 * GF-Complete: A Comprehensive Open Source Library for Galois Field Arithmetic
 * James S. Plank, Ethan L. Miller, Kevin M. Greenan,
 * Benjamin A. Arnold, John A. Burnum, Adam W. Disney, Allen C. McBride.
 *
 * gf_cpu.h
 *
 * Identifies whether the CPU supports SIMD instructions at runtime.
 */

#include <stdio.h>
#include <stdlib.h>

int gf_cpu_identified = 0;

int gf_cpu_supports_intel_pclmul = 0;
int gf_cpu_supports_intel_sse4 = 0;
int gf_cpu_supports_intel_ssse3 = 0;
int gf_cpu_supports_intel_sse3 = 0;
int gf_cpu_supports_intel_sse2 = 0;
int gf_cpu_supports_arm_neon = 0;

#if defined(__x86_64__)

#if defined(_MSC_VER)

#define cpuid(info, x)    __cpuidex(info, x, 0)

#elif defined(__GNUC__)

#include <cpuid.h>
void cpuid(int info[4], int InfoType){
    __cpuid_count(InfoType, 0, info[0], info[1], info[2], info[3]);
}

#else

#error please add a way to detect CPU SIMD support at runtime 

#endif

void gf_cpu_identify(void)
{
  if (gf_cpu_identified) {
      return;
  }

  int reg[4];

  cpuid(reg, 1);

#if defined(INTEL_SSE4_PCLMUL)
  if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE4_PCLMUL")) {
      gf_cpu_supports_intel_pclmul = 1;
#ifdef DEBUG_CPU_DETECTION
      printf("#gf_cpu_supports_intel_pclmul\n");
#endif
  }
#endif

#if defined(INTEL_SSE4)
  if (((reg[2] & (1<<20)) != 0 || (reg[2] & (1<<19)) != 0) && !getenv("GF_COMPLETE_DISABLE_SSE4")) {
      gf_cpu_supports_intel_sse4 = 1;
#ifdef DEBUG_CPU_DETECTION
      printf("#gf_cpu_supports_intel_sse4\n");
#endif
  }
#endif

#if defined(INTEL_SSSE3)
  if ((reg[2] & (1<<9)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSSE3")) {
      gf_cpu_supports_intel_ssse3 = 1;
#ifdef DEBUG_CPU_DETECTION
      printf("#gf_cpu_supports_intel_ssse3\n");
#endif
  }
#endif

#if defined(INTEL_SSE3)
  if ((reg[2] & 1) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE3")) {
      gf_cpu_supports_intel_sse3 = 1;
#ifdef DEBUG_CPU_DETECTION
      printf("#gf_cpu_supports_intel_sse3\n");
#endif
  }
#endif

#if defined(INTEL_SSE2)
  if ((reg[3] & (1<<26)) != 0 && !getenv("GF_COMPLETE_DISABLE_SSE2")) {
      gf_cpu_supports_intel_sse2 = 1;
#ifdef DEBUG_CPU_DETECTION
      printf("#gf_cpu_supports_intel_sse2\n");
#endif
  }
#endif

  gf_cpu_identified = 1;
}

#elif defined(__arm__) || defined(__aarch64__)

#ifdef __linux__

#include <stdio.h>
#include <unistd.h>
#include <elf.h>
#include <linux/auxvec.h>
#include <asm/hwcap.h>
#include <fcntl.h>

unsigned long get_hwcap(unsigned long type) {
    unsigned long hwcap = 0; 
    int fd = open("/proc/self/auxv", O_RDONLY);
    if (fd > 0) {
        Elf32_auxv_t auxv;
        while (read(fd, &auxv, sizeof(Elf32_auxv_t))) {
            if (auxv.a_type == type) {
                hwcap = auxv.a_un.a_val;
                break;
            }
        }
        close(fd);
    }

    return hwcap;
}

#endif // linux

void gf_cpu_identify(void)
{
  if (gf_cpu_identified) {
      return;
  }

#if defined(ARM_NEON)
  if (!getenv("GF_COMPLETE_DISABLE_NEON")) {
#if __linux__ && __arm__
	  gf_cpu_supports_arm_neon = (get_hwcap(AT_HWCAP) & HWCAP_NEON) > 0;
#elif __aarch64__
    // ASIMD is supported on all aarch64 architectures
	  gf_cpu_supports_arm_neon = 1;
#else
    // we assume that NEON is supported if the compiler supports
    // NEON and we dont have a reliable way to detect runtime support.
	  gf_cpu_supports_arm_neon = 1;
#endif

#ifdef DEBUG_CPU_DETECTION
    if (gf_cpu_supports_arm_neon) {
      printf("#gf_cpu_supports_arm_neon\n");
    }
#endif
  }
#endif // defined(ARM_NEON)

  gf_cpu_identified = 1;
}

#else // defined(__arm__) || defined(__aarch64__)

int gf_cpu_identify(void)
{
    gf_cpu_identified = 1;
    return 0;
}

#endif