summaryrefslogtreecommitdiff
path: root/src/compiler/shader_info.h
blob: 35655bd07259709d81b5665ce959080039cfcdb6 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
/*
 * Copyright © 2016 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

#ifndef SHADER_INFO_H
#define SHADER_INFO_H

#include "util/bitset.h"
#include "util/sha1/sha1.h"
#include "shader_enums.h"
#include <stdint.h>

#ifdef __cplusplus
extern "C" {
#endif

#define MAX_XFB_BUFFERS        4
#define MAX_INLINABLE_UNIFORMS 4

struct spirv_supported_capabilities {
   bool address;
   bool amd_fragment_mask;
   bool amd_gcn_shader;
   bool amd_image_gather_bias_lod;
   bool amd_image_read_write_lod;
   bool amd_shader_ballot;
   bool amd_shader_explicit_vertex_parameter;
   bool amd_trinary_minmax;
   bool atomic_storage;
   bool demote_to_helper_invocation;
   bool derivative_group;
   bool descriptor_array_dynamic_indexing;
   bool descriptor_array_non_uniform_indexing;
   bool descriptor_indexing;
   bool device_group;
   bool draw_parameters;
   bool float_controls;
   bool float16_atomic_add;
   bool float16_atomic_min_max;
   bool float16;
   bool float32_atomic_add;
   bool float32_atomic_min_max;
   bool float64_atomic_add;
   bool float64_atomic_min_max;
   bool float64;
   bool fragment_density;
   bool fragment_fully_covered;
   bool fragment_shader_pixel_interlock;
   bool fragment_shader_sample_interlock;
   bool fragment_shading_rate;
   bool generic_pointers;
   bool geometry_streams;
   bool groups;
   bool image_atomic_int64;
   bool image_ms_array;
   bool image_read_without_format;
   bool image_write_without_format;
   bool int16;
   bool int64_atomics;
   bool int64;
   bool int8;
   bool integer_functions2;
   bool kernel_image_read_write;
   bool kernel_image;
   bool kernel;
   bool linkage;
   bool literal_sampler;
   bool mesh_shading_nv;
   bool mesh_shading;
   bool min_lod;
   bool multiview;
   bool per_view_attributes_nv;
   bool physical_storage_buffer_address;
   bool post_depth_coverage;
   bool printf;
   bool ray_cull_mask;
   bool ray_query;
   bool ray_tracing;
   bool ray_traversal_primitive_culling;
   bool ray_tracing_position_fetch;
   bool runtime_descriptor_array;
   bool shader_clock;
   bool shader_viewport_index_layer;
   bool shader_viewport_mask_nv;
   bool sparse_residency;
   bool stencil_export;
   bool storage_16bit;
   bool storage_8bit;
   bool storage_image_ms;
   bool subgroup_arithmetic;
   bool subgroup_ballot;
   bool subgroup_basic;
   bool subgroup_dispatch;
   bool subgroup_quad;
   bool subgroup_rotate;
   bool subgroup_shuffle;
   bool subgroup_uniform_control_flow;
   bool subgroup_vote;
   bool tessellation;
   bool transform_feedback;
   bool variable_pointers;
   bool vk_memory_model_device_scope;
   bool vk_memory_model;
   bool workgroup_memory_explicit_layout;

   bool intel_subgroup_shuffle;
   bool intel_subgroup_buffer_block_io;
};

typedef struct shader_info {
   const char *name;

   /* Descriptive name provided by the client; may be NULL */
   const char *label;

   /* Shader is internal, and should be ignored by things like NIR_DEBUG=print */
   bool internal;

   /* SHA1 of the original source, used by shader detection in drivers. */
   uint8_t source_sha1[SHA1_DIGEST_LENGTH];

   /** The shader stage, such as MESA_SHADER_VERTEX. */
   gl_shader_stage stage:8;

   /** The shader stage in a non SSO linked program that follows this stage,
     * such as MESA_SHADER_FRAGMENT.
     */
   gl_shader_stage next_stage:8;

   /* Number of textures used by this shader */
   uint8_t num_textures;
   /* Number of uniform buffers used by this shader */
   uint8_t num_ubos;
   /* Number of atomic buffers used by this shader */
   uint8_t num_abos;
   /* Number of shader storage buffers (max .driver_location + 1) used by this
    * shader.  In the case of nir_lower_atomics_to_ssbo being used, this will
    * be the number of actual SSBOs in gl_program->info, and the lowered SSBOs
    * and atomic counters in nir_shader->info.
    */
   uint8_t num_ssbos;
   /* Number of images used by this shader */
   uint8_t num_images;

   /* Which inputs are actually read */
   uint64_t inputs_read;
   /* Which outputs are actually written */
   uint64_t outputs_written;
   /* Which outputs are actually read */
   uint64_t outputs_read;
   /* Which system values are actually read */
   BITSET_DECLARE(system_values_read, SYSTEM_VALUE_MAX);

   /* Which I/O is per-primitive, for read/written information combine with
    * the fields above.
    */
   uint64_t per_primitive_inputs;
   uint64_t per_primitive_outputs;

   /* Which I/O is per-view */
   uint64_t per_view_outputs;

   /* Which 16-bit inputs and outputs are used corresponding to
    * VARYING_SLOT_VARn_16BIT.
    */
   uint16_t inputs_read_16bit;
   uint16_t outputs_written_16bit;
   uint16_t outputs_read_16bit;
   uint16_t inputs_read_indirectly_16bit;
   uint16_t outputs_accessed_indirectly_16bit;

   /* Which patch inputs are actually read */
   uint32_t patch_inputs_read;
   /* Which patch outputs are actually written */
   uint32_t patch_outputs_written;
   /* Which patch outputs are read */
   uint32_t patch_outputs_read;

   /* Which inputs are read indirectly (subset of inputs_read) */
   uint64_t inputs_read_indirectly;
   /* Which outputs are read or written indirectly */
   uint64_t outputs_accessed_indirectly;
   /* Which patch inputs are read indirectly (subset of patch_inputs_read) */
   uint64_t patch_inputs_read_indirectly;
   /* Which patch outputs are read or written indirectly */
   uint64_t patch_outputs_accessed_indirectly;

   /** Bitfield of which textures are used */
   BITSET_DECLARE(textures_used, 128);

   /** Bitfield of which textures are used by texelFetch() */
   BITSET_DECLARE(textures_used_by_txf, 128);

   /** Bitfield of which samplers are used */
   BITSET_DECLARE(samplers_used, 32);

   /** Bitfield of which images are used */
   BITSET_DECLARE(images_used, 64);
   /** Bitfield of which images are buffers. */
   BITSET_DECLARE(image_buffers, 64);
   /** Bitfield of which images are MSAA. */
   BITSET_DECLARE(msaa_images, 64);

   /* SPV_KHR_float_controls: execution mode for floating point ops */
   uint16_t float_controls_execution_mode;

   /**
    * Size of shared variables accessed by compute/task/mesh shaders.
    */
   unsigned shared_size;

   /**
    * Size of task payload variables accessed by task/mesh shaders.
    */
   unsigned task_payload_size;

   /**
    * Number of ray tracing queries in the shader (counts all elements of all
    * variables).
    */
   unsigned ray_queries;

   /**
    * Local workgroup size used by compute/task/mesh shaders.
    */
   uint16_t workgroup_size[3];

   enum gl_subgroup_size subgroup_size;

   /**
    * Uses subgroup intrinsics which can communicate across a quad.
    */
   bool uses_wide_subgroup_intrinsics;

   /* Transform feedback buffer strides in dwords, max. 1K - 4. */
   uint8_t xfb_stride[MAX_XFB_BUFFERS];

   uint16_t inlinable_uniform_dw_offsets[MAX_INLINABLE_UNIFORMS];
   uint8_t num_inlinable_uniforms:4;

   /* The size of the gl_ClipDistance[] array, if declared. */
   uint8_t clip_distance_array_size:4;

   /* The size of the gl_CullDistance[] array, if declared. */
   uint8_t cull_distance_array_size:4;

   /* Whether or not this shader ever uses textureGather() */
   bool uses_texture_gather:1;

   /* Whether texture size, levels, or samples is queried. */
   bool uses_resource_info_query:1;

   /**
    * True if this shader uses the fddx/fddy opcodes.
    *
    * Note that this does not include the "fine" and "coarse" variants.
    */
   bool uses_fddx_fddy:1;

   /** Has divergence analysis ever been run? */
   bool divergence_analysis_run:1;

   /* Bitmask of bit-sizes used with ALU instructions. */
   uint8_t bit_sizes_float;
   uint8_t bit_sizes_int;

   /* Whether the first UBO is the default uniform buffer, i.e. uniforms. */
   bool first_ubo_is_default_ubo:1;

   /* Whether or not separate shader objects were used */
   bool separate_shader:1;

   /** Was this shader linked with any transform feedback varyings? */
   bool has_transform_feedback_varyings:1;

   /* Whether flrp has been lowered. */
   bool flrp_lowered:1;

   /* Whether nir_lower_io has been called to lower derefs.
    * nir_variables for inputs and outputs might not be present in the IR.
    */
   bool io_lowered:1;

   /** Has nir_lower_var_copies called. To avoid calling any
    * lowering/optimization that would introduce any copy_deref later.
    */
   bool var_copies_lowered:1;

   /* Whether the shader writes memory, including transform feedback. */
   bool writes_memory:1;

   /* Whether gl_Layer is viewport-relative */
   bool layer_viewport_relative:1;

   /* Whether explicit barriers are used */
   bool uses_control_barrier : 1;
   bool uses_memory_barrier : 1;

   /* Whether ARB_bindless_texture ops or variables are used */
   bool uses_bindless : 1;

   /**
    * Shared memory types have explicit layout set.  Used for
    * SPV_KHR_workgroup_storage_explicit_layout.
    */
   bool shared_memory_explicit_layout:1;

   /**
    * Used for VK_KHR_zero_initialize_workgroup_memory.
    */
   bool zero_initialize_shared_memory:1;

   /**
    * Used for ARB_compute_variable_group_size.
    */
   bool workgroup_size_variable:1;

   /**
     * Set if this shader uses legacy (DX9 or ARB assembly) math rules.
     *
     * From the ARB_fragment_program specification:
     *
     *    "The following rules apply to multiplication:
     *
     *      1. <x> * <y> == <y> * <x>, for all <x> and <y>.
     *      2. +/-0.0 * <x> = +/-0.0, at least for all <x> that correspond to
     *         *representable numbers (IEEE "not a number" and "infinity"
     *         *encodings may be exceptions).
     *      3. +1.0 * <x> = <x>, for all <x>.""
     *
     * However, in effect this was due to DX9 semantics implying that 0*x=0 even
     * for inf/nan if the hardware generated them instead of float_min/max.  So,
     * you should not have an exception for inf/nan to rule 2 above.
     *
     * One implementation of this behavior would be to flush all generated NaNs
     * to zero, at which point 0*Inf=Nan=0.  Most DX9/ARB-asm hardware did not
     * generate NaNs, and the only way the GPU saw one was to possibly feed it
     * in as a uniform.
     */
   bool use_legacy_math_rules;

   union {
      struct {
         /* Which inputs are doubles */
         uint64_t double_inputs;

         /* For AMD-specific driver-internal shaders. It replaces vertex
          * buffer loads with code generating VS inputs from scalar registers.
          *
          * Valid values: SI_VS_BLIT_SGPRS_POS_*
          */
         uint8_t blit_sgprs_amd:4;

         /* True if the shader writes position in window space coordinates pre-transform */
         bool window_space_position:1;

         /** Is an edge flag input needed? */
         bool needs_edge_flag:1;
      } vs;

      struct {
         /** The output primitive type */
         uint16_t output_primitive;

         /** The input primitive type */
         uint16_t input_primitive;

         /** The maximum number of vertices the geometry shader might write. */
         uint16_t vertices_out;

         /** 1 .. MAX_GEOMETRY_SHADER_INVOCATIONS */
         uint8_t invocations;

         /** The number of vertices received per input primitive (max. 6) */
         uint8_t vertices_in:3;

         /** Whether or not this shader uses EndPrimitive */
         bool uses_end_primitive:1;

         /** The streams used in this shaders (max. 4) */
         uint8_t active_stream_mask:4;
      } gs;

      struct {
         bool uses_discard:1;
         bool uses_demote:1;
         bool uses_fbfetch_output:1;
         bool fbfetch_coherent:1;
         bool color_is_dual_source:1;

         /**
          * True if this fragment shader requires helper invocations.  This
          * can be caused by the use of ALU derivative ops, texture
          * instructions which do implicit derivatives, and the use of quad
          * subgroup operations.
          */
         bool needs_quad_helper_invocations:1;

         /**
          * True if this fragment shader requires helper invocations for
          * all subgroup operations, not just quad ops and derivatives.
          */
         bool needs_all_helper_invocations:1;

         /**
          * Whether any inputs are declared with the "sample" qualifier.
          */
         bool uses_sample_qualifier:1;

         /**
          * Whether sample shading is used.
          */
         bool uses_sample_shading:1;

         /**
          * Whether early fragment tests are enabled as defined by
          * ARB_shader_image_load_store.
          */
         bool early_fragment_tests:1;

         /**
          * Defined by INTEL_conservative_rasterization.
          */
         bool inner_coverage:1;

         bool post_depth_coverage:1;

         /**
          * \name ARB_fragment_coord_conventions
          * @{
          */
         bool pixel_center_integer:1;
         bool origin_upper_left:1;
         /*@}*/

         bool pixel_interlock_ordered:1;
         bool pixel_interlock_unordered:1;
         bool sample_interlock_ordered:1;
         bool sample_interlock_unordered:1;

         /**
          * Flags whether NIR's base types on the FS color outputs should be
          * ignored.
          *
          * GLSL requires that fragment shader output base types match the
          * render target's base types for the behavior to be defined.  From
          * the GL 4.6 spec:
          *
          *     "If the values written by the fragment shader do not match the
          *      format(s) of the corresponding color buffer(s), the result is
          *      undefined."
          *
          * However, for NIR shaders translated from TGSI, we don't have the
          * output types any more, so the driver will need to do whatever
          * fixups are necessary to handle effectively untyped data being
          * output from the FS.
          */
         bool untyped_color_outputs:1;

         /** gl_FragDepth layout for ARB_conservative_depth. */
         enum gl_frag_depth_layout depth_layout:3;

         /**
          * Interpolation qualifiers for drivers that lowers color inputs
          * to system values.
          */
         unsigned color0_interp:3; /* glsl_interp_mode */
         bool color0_sample:1;
         bool color0_centroid:1;
         unsigned color1_interp:3; /* glsl_interp_mode */
         bool color1_sample:1;
         bool color1_centroid:1;

         /* Bitmask of gl_advanced_blend_mode values that may be used with this
          * shader.
          */
         unsigned advanced_blend_modes;

         /**
          * Defined by AMD_shader_early_and_late_fragment_tests.
          */
         bool early_and_late_fragment_tests:1;
         enum gl_frag_stencil_layout stencil_front_layout:3;
         enum gl_frag_stencil_layout stencil_back_layout:3;
      } fs;

      struct {
         uint16_t workgroup_size_hint[3];

         uint8_t user_data_components_amd:3;

         /*
          * Arrangement of invocations used to calculate derivatives in a compute
          * shader.  From NV_compute_shader_derivatives.
          */
         enum gl_derivative_group derivative_group:2;

         /*
          * If the shader might run with shared mem on top of `shared_size`.
          */
         bool has_variable_shared_mem:1;

         /**
          * pointer size is:
          *   AddressingModelLogical:    0    (default)
          *   AddressingModelPhysical32: 32
          *   AddressingModelPhysical64: 64
          */
         unsigned ptr_size;
      } cs;

      /* Applies to both TCS and TES. */
      struct {
         enum tess_primitive_mode _primitive_mode;

         /** The number of vertices in the TCS output patch. */
         uint8_t tcs_vertices_out;
         unsigned spacing:2; /*gl_tess_spacing*/

         /** Is the vertex order counterclockwise? */
         bool ccw:1;
         bool point_mode:1;

         /* Bit mask of TCS per-vertex inputs (VS outputs) that are used
          * with a vertex index that is NOT the invocation id
          */
         uint64_t tcs_cross_invocation_inputs_read;

         /* Bit mask of TCS per-vertex outputs that are used
          * with a vertex index that is NOT the invocation id
          */
         uint64_t tcs_cross_invocation_outputs_read;
      } tess;

      /* Applies to MESH and TASK. */
      struct {
         /* Bit mask of MS outputs that are used
          * with an index that is NOT the local invocation index.
          */
         uint64_t ms_cross_invocation_output_access;

         /* Dimensions of task->mesh dispatch (EmitMeshTasksEXT)
          * when they are known compile-time constants.
          * 0 means they are not known.
          */
         uint32_t ts_mesh_dispatch_dimensions[3];

         uint16_t max_vertices_out;
         uint16_t max_primitives_out;
         uint16_t primitive_type;  /* GL_POINTS, GL_LINES or GL_TRIANGLES. */

         /* TODO: remove this when we stop supporting NV_mesh_shader. */
         bool nv;
      } mesh;
   };
} shader_info;

#ifdef __cplusplus
}
#endif

#endif /* SHADER_INFO_H */