summaryrefslogtreecommitdiff
path: root/src/util/u_cpu_detect.h
blob: f9db16f82da39808242eb4422b4f85eac353ec87 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
/**************************************************************************
 *
 * Copyright 2008 Dennis Smit
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * on the rights to use, copy, modify, merge, publish, distribute, sub
 * license, and/or sell copies of the Software, and to permit persons to whom
 * the Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.  IN NO EVENT SHALL
 * AUTHORS, COPYRIGHT HOLDERS, AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 ***************************************************************************/

/**
 * @file
 * CPU feature detection.
 *
 * @author Dennis Smit
 * @author Based on the work of Eric Anholt <anholt@FreeBSD.org>
 */

#ifndef _UTIL_CPU_DETECT_H
#define _UTIL_CPU_DETECT_H

#include <stdbool.h>

#include "util/macros.h"
#include "util/u_atomic.h"
#include "util/u_thread.h"


/* Maximal cpu count for update affinity */
#define UTIL_MAX_CPUS               1024  /* this should be enough */

#ifdef __cplusplus
extern "C" {
#endif

enum cpu_family {
   CPU_UNKNOWN,

   CPU_AMD_ZEN1_ZEN2,
   CPU_AMD_ZEN_HYGON,
   CPU_AMD_ZEN3,
   CPU_AMD_ZEN_NEXT,
   CPU_AMD_LAST,

   CPU_S390X,
};

typedef uint32_t util_affinity_mask[UTIL_MAX_CPUS / 32];

struct util_cpu_caps_t {
   /**
    * Number of CPUs available to the process.
    *
    * This will be less than or equal to \c max_cpus.  This is the number of
    * CPUs that are online and available to the process.
    */
   int16_t nr_cpus;

   /**
    * Maximum number of CPUs that can be online in the system.
    *
    * This will be greater than or equal to \c nr_cpus.  This is the number of
    * CPUs installed in the system.  \c nr_cpus will be less if some CPUs are
    * offline.
    */
   int16_t max_cpus;

   enum cpu_family family;

   /* Feature flags */
   int x86_cpu_type;
   unsigned cacheline;

   unsigned has_intel:1;
   unsigned has_tsc:1;
   unsigned has_mmx:1;
   unsigned has_mmx2:1;
   unsigned has_sse:1;
   unsigned has_sse2:1;
   unsigned has_sse3:1;
   unsigned has_ssse3:1;
   unsigned has_sse4_1:1;
   unsigned has_sse4_2:1;
   unsigned has_popcnt:1;
   unsigned has_avx:1;
   unsigned has_avx2:1;
   unsigned has_f16c:1;
   unsigned has_fma:1;
   unsigned has_3dnow:1;
   unsigned has_3dnow_ext:1;
   unsigned has_xop:1;
   unsigned has_altivec:1;
   unsigned has_vsx:1;
   unsigned has_daz:1;
   unsigned has_neon:1;
   unsigned has_msa:1;

   unsigned has_avx512f:1;
   unsigned has_avx512dq:1;
   unsigned has_avx512ifma:1;
   unsigned has_avx512pf:1;
   unsigned has_avx512er:1;
   unsigned has_avx512cd:1;
   unsigned has_avx512bw:1;
   unsigned has_avx512vl:1;
   unsigned has_avx512vbmi:1;

   unsigned num_L3_caches;
   unsigned num_cpu_mask_bits;
   unsigned max_vector_bits;

   uint16_t cpu_to_L3[UTIL_MAX_CPUS];

   /* Affinity masks for each L3 cache. */
   util_affinity_mask *L3_affinity_mask;
   /**
    * number of "big" CPUs in big.LITTLE configuration
    * 
    * a "big" CPU is defined as anything with >= 50% the capacity of the largest CPU,
    * useful for drivers determining how many and what kinds of threads to use
    * example: 1x prime + 3x big + 4x little = 4x "big" cores
    * 
    * A value of zero indicates that CPUs are homogeneous.
    */
   int16_t nr_big_cpus;
};

struct _util_cpu_caps_state_t {
   once_flag once_flag;
   /**
    * Initialized to 0 and set to non-zero with an atomic after the entire
    * struct has been initialized.
    */
   uint32_t detect_done;
   struct util_cpu_caps_t caps;
};

#define U_CPU_INVALID_L3 0xffff

static inline ATTRIBUTE_CONST const struct util_cpu_caps_t *
util_get_cpu_caps(void)
{
   extern void _util_cpu_detect_once(void);
   extern struct _util_cpu_caps_state_t _util_cpu_caps_state;

   /* On most CPU architectures, an atomic read is simply a regular memory
    * load instruction with some extra compiler magic to prevent code
    * re-ordering around it.  The perf impact of doing this check should be
    * negligible in most cases.
    *
    * Also, even though it looks like  a bit of a lie, we've declared this
    * function with ATTRIBUTE_CONST.  The GCC docs say:
    *
    *    "Calls to functions whose return value is not affected by changes to
    *    the observable state of the program and that have no observable
    *    effects on such state other than to return a value may lend
    *    themselves to optimizations such as common subexpression elimination.
    *    Declaring such functions with the const attribute allows GCC to avoid
    *    emitting some calls in repeated invocations of the function with the
    *    same argument values."
    *
    * The word "observable" is important here.  With the exception of a
    * llvmpipe debug flag behind an environment variable and a few unit tests,
    * all of which emulate worse CPUs, this function neither affects nor is
    * affected by any "observable" state.  It has its own internal state for
    * sure, but that state is such that it appears to return exactly the same
    * value with the same internal data every time.
    */
   if (unlikely(!p_atomic_read(&_util_cpu_caps_state.detect_done)))
      call_once(&_util_cpu_caps_state.once_flag, _util_cpu_detect_once);

   return &_util_cpu_caps_state.caps;
}

#ifdef __cplusplus
}
#endif


#endif /* _UTIL_CPU_DETECT_H */