1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
|
//
// Copyright (C) 2009-2021 Intel Corporation
//
// SPDX-License-Identifier: MIT
//
//
module leaf_builder;
kernel_module leaf_kernels ("bvh_build_leaf.cl")
{
links lsc_intrinsics;
kernel opencl_kernel_primref_to_quads < kernelFunction="primref_to_quads" >;
kernel opencl_kernel_primref_to_procedurals < kernelFunction="primref_to_procedurals" >;
kernel opencl_kernel_create_HW_instance_nodes < kernelFunction="create_HW_instance_nodes" >;
kernel opencl_kernel_create_HW_instance_nodes_pointers < kernelFunction="create_HW_instance_nodes_pointers" >;
}
import struct MKBuilderState "structs.grl";
import struct MKSizeEstimate "structs.grl";
const Instances_GROUPSIZE = 16;
metakernel buildLeafDXR_instances(
MKBuilderState state,
qword build_primref_index_buffers,
qword srcInstanceDescrArray,
dword stride,
dword offset,
dword numPrims)
{
define num_groups (numPrims+Instances_GROUPSIZE-1)/Instances_GROUPSIZE;
dispatch opencl_kernel_create_HW_instance_nodes(num_groups,1,1) args(
state.build_globals,
build_primref_index_buffers,
state.build_primref_buffer,
state.bvh_buffer,
srcInstanceDescrArray,
stride,
offset);
}
metakernel buildLeafDXR_instances_indirect(
MKBuilderState state,
qword build_primref_index_buffers,
qword srcInstanceDescrArray,
qword indirectBuildRangeInfo,
dword stride,
dword offset)
{
define num_groups REG0;
define groupsize_1 REG1; // groupsize - 1
define C_4 REG2;
// init with primitiveCount
num_groups = load_dword(indirectBuildRangeInfo);
groupsize_1 = 15; // Instances_GROUPSIZE - 1
C_4 = 4; // log_2(Instances_GROUPSIZE)
num_groups = num_groups + groupsize_1;
num_groups = num_groups >> C_4; // num_groups / Instances_GROUPSIZE;
DISPATCHDIM_X = num_groups.lo;
DISPATCHDIM_Y = 1;
DISPATCHDIM_Z = 1;
dispatch_indirect opencl_kernel_create_HW_instance_nodes args(
state.build_globals,
build_primref_index_buffers,
state.build_primref_buffer,
state.bvh_buffer,
srcInstanceDescrArray,
stride,
offset);
}
metakernel buildLeafDXR_instances_pointers(
MKBuilderState state,
qword build_primref_index_buffers,
qword srcInstanceDescrArrayPtr,
dword stride,
dword offset,
dword numPrims)
{
define num_groups (numPrims+Instances_GROUPSIZE-1)/Instances_GROUPSIZE;
dispatch opencl_kernel_create_HW_instance_nodes_pointers(num_groups,1,1) args(
state.build_globals,
build_primref_index_buffers,
state.build_primref_buffer,
state.bvh_buffer,
srcInstanceDescrArrayPtr,
stride,
offset);
}
metakernel buildLeafDXR_instances_pointers_indirect(
MKBuilderState state,
qword build_primref_index_buffers,
qword srcInstanceDescrArrayPtr,
qword indirectBuildRangeInfo,
dword stride,
dword offset)
{
define num_groups REG0;
define groupsize_1 REG1; // groupsize - 1
define C_4 REG2;
// init with primitiveCount
num_groups = load_dword(indirectBuildRangeInfo);
groupsize_1 = 15; // Instances_GROUPSIZE - 1
C_4 = 4; // log_2(Instances_GROUPSIZE)
num_groups = num_groups + groupsize_1;
num_groups = num_groups >> C_4; // num_groups / Instances_GROUPSIZE;
DISPATCHDIM_X = num_groups.lo;
DISPATCHDIM_Y = 1;
DISPATCHDIM_Z = 1;
dispatch_indirect opencl_kernel_create_HW_instance_nodes_pointers args(
state.build_globals,
build_primref_index_buffers,
state.build_primref_buffer,
state.bvh_buffer,
srcInstanceDescrArrayPtr,
stride,
offset);
}
metakernel buildLeafDXR_procedurals(
MKBuilderState state,
qword build_primref_index_buffers,
dword stride,
dword offset,
qword p_numPrimitives)
{
define C_1 REG0;
define REG_PRIMS_PER_WG REG1;
define REG_PRIMS_PER_WG_SHR REG2;
C_1 = 1;
REG_PRIMS_PER_WG = 16;
REG_PRIMS_PER_WG_SHR = 4;// We cannot use div, so we use shift right instead (shift by 4 = div by 16 elements)
define reg_numPrimitives REG3;
define reg_num_wgs REG4;
reg_numPrimitives = load_dword(p_numPrimitives);
reg_num_wgs = reg_numPrimitives + REG_PRIMS_PER_WG;
reg_num_wgs = reg_num_wgs - C_1;
reg_num_wgs = reg_num_wgs >> REG_PRIMS_PER_WG_SHR;
DISPATCHDIM_X = reg_num_wgs;
DISPATCHDIM_Y = 1;
DISPATCHDIM_Z = 1;
dispatch_indirect opencl_kernel_primref_to_procedurals args(
state.build_globals,
state.build_primref_buffer,
build_primref_index_buffers,
state.bvh_buffer,
state.geomDesc_buffer,
stride,
offset);
}
metakernel buildLeafDXR_quads(
MKBuilderState state,
qword build_primref_index_buffers,
dword stride,
dword offset,
qword p_numPrimitives,
dword allow_update)
{
define C_1 REG0;
define REG_PRIMS_PER_WG REG1;
define SHIFT REG2;
C_1 = 1;
REG_PRIMS_PER_WG = 32;
SHIFT = 4;// We cannot use div, so we use shift right instead (shift by 4 = div by 16 elements)
define reg_numPrimitives REG3;
define reg_num_wgs REG4;
reg_numPrimitives = load_dword(p_numPrimitives);
reg_num_wgs = reg_numPrimitives + REG_PRIMS_PER_WG;
reg_num_wgs = reg_num_wgs - C_1;
reg_num_wgs = reg_num_wgs >> SHIFT;
reg_num_wgs = reg_num_wgs >> C_1;
DISPATCHDIM_X = reg_num_wgs;
DISPATCHDIM_Y = 1;
DISPATCHDIM_Z = 1;
dispatch_indirect opencl_kernel_primref_to_quads args(
state.build_globals,
state.build_primref_buffer,
build_primref_index_buffers,
state.bvh_buffer,
state.geomDesc_buffer,
stride,
offset,
allow_update);
}
|