1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
|
/*
* Copyright © 2020 Google, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice (including the next
* paragraph) shall be included in all copies or substantial portions of the
* Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*/
#ifndef _U_TRACE_H
#define _U_TRACE_H
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include "util/macros.h"
#include "util/u_atomic.h"
#include "util/u_queue.h"
#ifdef __cplusplus
extern "C" {
#endif
/* A trace mechanism (very) loosely inspired by the linux kernel tracepoint
* mechanism, in that it allows for defining driver specific (or common)
* tracepoints, which generate 'trace_$name()' functions that can be
* called at various points in commandstream emit.
*
* Currently a printf backend is implemented, but the expectation is to
* also implement a perfetto backend for shipping out traces to a tool like
* AGI.
*
* Notable differences:
*
* - GPU timestamps! A driver provided callback is used to emit timestamps
* to a buffer. At a later point in time (when stalling to wait for the
* GPU is not required), the timestamps are re-united with the trace
* payload. This makes the trace mechanism suitable for profiling.
*
* - Instead of a systemwide trace ringbuffer, buffering of un-retired
* tracepoints is split into two stages. Traces are emitted to a
* 'u_trace' instance, and at a later time flushed to a 'u_trace_context'
* instance. This avoids the requirement that commandstream containing
* tracepoints is emitted in the same order as it is generated.
*
* If the hw has multiple parallel "engines" (for example, 3d/blit/compute)
* then a `u_trace_context` per-engine should be used.
*
* - Unlike kernel tracepoints, u_trace tracepoints are defined in py
* from which header and src files are generated. Since we already have
* a build dependency on python+mako, this gives more flexibility than
* clunky preprocessor macro magic.
*
*/
struct u_trace_context;
struct u_trace;
struct u_trace_chunk;
struct u_trace_printer;
/**
* Special reserved value to indicate that no timestamp was captured,
* and that the timestamp of the previous trace should be reused.
*/
#define U_TRACE_NO_TIMESTAMP ((uint64_t) 0)
/**
* Driver provided callback to create a timestamp buffer which will be
* read by u_trace_read_ts function.
*/
typedef void *(*u_trace_create_ts_buffer)(struct u_trace_context *utctx,
uint32_t timestamps_count);
/**
* Driver provided callback to delete a timestamp buffer.
*/
typedef void (*u_trace_delete_ts_buffer)(struct u_trace_context *utctx,
void *timestamps);
/**
* Driver provided callback to emit commands into the soecified command
* stream to capture a 64b timestamp into the specified timestamps buffer,
* at the specified index.
*
* The hw counter that the driver records should be something that runs at
* a fixed rate, even as the GPU freq changes. The same source used for
* GL_TIMESTAMP queries should be appropriate.
*/
typedef void (*u_trace_record_ts)(struct u_trace *ut,
void *cs,
void *timestamps,
unsigned idx,
bool end_of_pipe);
/**
* Driver provided callback to read back a previously recorded timestamp.
* If necessary, this should block until the GPU has finished writing back
* the timestamps. (The timestamps will be read back in order, so it is
* safe to only synchronize on idx==0.)
*
* flush_data is data provided by the driver via u_trace_flush.
*
* The returned timestamp should be in units of nanoseconds. The same
* timebase as GL_TIMESTAMP queries should be used.
*
* The driver can return the special U_TRACE_NO_TIMESTAMP value to indicate
* that no timestamp was captured and the timestamp from the previous trace
* will be re-used. (The first trace in the u_trace buf may not do this.)
* This allows the driver to detect cases where multiple tracepoints are
* emitted with no other intervening cmdstream, to avoid pointlessly
* capturing the same timestamp multiple times in a row.
*/
typedef uint64_t (*u_trace_read_ts)(struct u_trace_context *utctx,
void *timestamps,
unsigned idx,
void *flush_data);
/**
* Driver provided callback to delete flush data.
*/
typedef void (*u_trace_delete_flush_data)(struct u_trace_context *utctx,
void *flush_data);
enum u_trace_type {
U_TRACE_TYPE_PRINT = 1u << 0,
U_TRACE_TYPE_JSON = 1u << 1,
U_TRACE_TYPE_PERFETTO_ACTIVE = 1u << 2,
U_TRACE_TYPE_PERFETTO_ENV = 1u << 3,
U_TRACE_TYPE_MARKERS = 1u << 4,
U_TRACE_TYPE_PRINT_JSON = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_JSON,
U_TRACE_TYPE_PERFETTO =
U_TRACE_TYPE_PERFETTO_ACTIVE | U_TRACE_TYPE_PERFETTO_ENV,
/*
* A mask of traces that require appending to the tracepoint chunk list.
*/
U_TRACE_TYPE_REQUIRE_QUEUING = U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO,
/*
* A mask of traces that require processing the tracepoint chunk list.
*/
U_TRACE_TYPE_REQUIRE_PROCESSING =
U_TRACE_TYPE_PRINT | U_TRACE_TYPE_PERFETTO_ACTIVE,
};
/**
* The trace context provides tracking for "in-flight" traces, once the
* cmdstream that records timestamps has been flushed.
*/
struct u_trace_context {
/* All traces enabled in this context */
enum u_trace_type enabled_traces;
void *pctx;
u_trace_create_ts_buffer create_timestamp_buffer;
u_trace_delete_ts_buffer delete_timestamp_buffer;
u_trace_record_ts record_timestamp;
u_trace_read_ts read_timestamp;
u_trace_delete_flush_data delete_flush_data;
FILE *out;
struct u_trace_printer *out_printer;
/* Once u_trace_flush() is called u_trace_chunk's are queued up to
* render tracepoints on a queue. The per-chunk queue jobs block until
* timestamps are available.
*/
struct util_queue queue;
#ifdef HAVE_PERFETTO
/* node in global list of trace contexts. */
struct list_head node;
#endif
/* State to accumulate time across N chunks associated with a single
* batch (u_trace).
*/
uint64_t last_time_ns;
uint64_t first_time_ns;
uint32_t frame_nr;
uint32_t batch_nr;
uint32_t event_nr;
bool start_of_frame;
/* list of unprocessed trace chunks in fifo order: */
struct list_head flushed_trace_chunks;
};
/**
* The u_trace ptr is passed as the first arg to generated tracepoints.
* It provides buffering for tracepoint payload until the corresponding
* driver cmdstream containing the emitted commands to capture is
* flushed.
*
* Individual tracepoints emitted to u_trace are expected to be "executed"
* (ie. timestamp captured) in FIFO order with respect to other tracepoints
* emitted to the same u_trace. But the order WRT other u_trace instances
* is undefined util u_trace_flush().
*/
struct u_trace {
struct u_trace_context *utctx;
struct list_head
trace_chunks; /* list of unflushed trace chunks in fifo order */
};
void u_trace_context_init(struct u_trace_context *utctx,
void *pctx,
u_trace_create_ts_buffer create_timestamp_buffer,
u_trace_delete_ts_buffer delete_timestamp_buffer,
u_trace_record_ts record_timestamp,
u_trace_read_ts read_timestamp,
u_trace_delete_flush_data delete_flush_data);
void u_trace_context_fini(struct u_trace_context *utctx);
/**
* Flush (trigger processing) of traces previously flushed to the
* trace-context by u_trace_flush().
*
* This should typically be called in the driver's pctx->flush().
*/
void u_trace_context_process(struct u_trace_context *utctx, bool eof);
void u_trace_init(struct u_trace *ut, struct u_trace_context *utctx);
void u_trace_fini(struct u_trace *ut);
void u_trace_state_init(void);
bool u_trace_is_enabled(enum u_trace_type type);
bool u_trace_has_points(struct u_trace *ut);
struct u_trace_iterator {
struct u_trace *ut;
struct u_trace_chunk *chunk;
uint32_t event_idx;
};
struct u_trace_iterator u_trace_begin_iterator(struct u_trace *ut);
struct u_trace_iterator u_trace_end_iterator(struct u_trace *ut);
bool u_trace_iterator_equal(struct u_trace_iterator a,
struct u_trace_iterator b);
typedef void (*u_trace_copy_ts_buffer)(struct u_trace_context *utctx,
void *cmdstream,
void *ts_from,
uint32_t from_offset,
void *ts_to,
uint32_t to_offset,
uint32_t count);
/**
* Clones tracepoints range into target u_trace.
* Provides callback for driver to copy timestamps on GPU from
* one buffer to another.
*
* It allows:
* - Tracing re-usable command buffer in Vulkan, by copying tracepoints
* each time it is submitted.
* - Per-tile tracing for tiling GPUs, by copying a range of tracepoints
* corresponding to a tile.
*/
void u_trace_clone_append(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it,
struct u_trace *into,
void *cmdstream,
u_trace_copy_ts_buffer copy_ts_buffer);
void u_trace_disable_event_range(struct u_trace_iterator begin_it,
struct u_trace_iterator end_it);
/**
* Flush traces to the parent trace-context. At this point, the expectation
* is that all the tracepoints are "executed" by the GPU following any
* previously flushed u_trace batch.
*
* flush_data is a way for driver to pass additional data, which becomes
* available only at the point of flush, to the u_trace_read_ts callback and
* perfetto. The typical example of such data would be a fence to wait on in
* u_trace_read_ts, and a submission_id to pass into perfetto. The destruction
* of the data is done via u_trace_delete_flush_data.
*
* This should typically be called when the corresponding cmdstream
* (containing the timestamp reads) is flushed to the kernel.
*/
void u_trace_flush(struct u_trace *ut, void *flush_data, bool free_data);
#ifdef HAVE_PERFETTO
static ALWAYS_INLINE bool
u_trace_perfetto_active(struct u_trace_context *utctx)
{
return p_atomic_read_relaxed(&utctx->enabled_traces) &
U_TRACE_TYPE_PERFETTO_ACTIVE;
}
void u_trace_perfetto_start(void);
void u_trace_perfetto_stop(void);
#else
static ALWAYS_INLINE bool
u_trace_perfetto_active(UNUSED struct u_trace_context *utctx)
{
return false;
}
#endif
/**
* Return whether utrace is enabled at all or not, this can be used to
* gate any expensive traces.
*/
static ALWAYS_INLINE bool
u_trace_enabled(struct u_trace_context *utctx)
{
return p_atomic_read_relaxed(&utctx->enabled_traces) != 0;
}
/**
* Return whether chunks should be processed or not.
*/
static ALWAYS_INLINE bool
u_trace_should_process(struct u_trace_context *utctx)
{
return p_atomic_read_relaxed(&utctx->enabled_traces) &
U_TRACE_TYPE_REQUIRE_PROCESSING;
}
/**
* Return whether to emit markers into the command stream even if the queue
* isn't active.
*/
static ALWAYS_INLINE bool
u_trace_markers_enabled(struct u_trace_context *utctx)
{
return p_atomic_read_relaxed(&utctx->enabled_traces) &
U_TRACE_TYPE_MARKERS;
}
#ifdef __cplusplus
}
#endif
#endif /* _U_TRACE_H */
|