From e539e9a7afacc330117a761e0ee55d6452cffaca Mon Sep 17 00:00:00 2001 From: Laszlo Agocs Date: Mon, 3 Apr 2023 19:18:59 +0200 Subject: rhi: Replace the temporary GPU time query API with a saner one Modeled after Metal's cb.GPUStart/EndTime. Implemented with timestamp queries for other APIs. Implemented for Metal, D3D11, Vulkan for now. No more callback, just a getter on the command buffer which returns the latest known value, referring to some previous frame. This makes it a lot more usable than the original solution that is not really used anywhere at the moment. Now works for offscreen "frames" as well, this was not implemented before. Opt in with a new QRhi::create() flag because we cannot tell in advance if the getter will be called or not, and this way we can skip recording the timestamps by default. The cost is probably minimal, though. Qt Quick will set this automatically when running with QSG_RHI_PROFILE=1. Change-Id: I903779984a4e0bbf1d03806d04bf61571ce23d72 Reviewed-by: Laszlo Agocs --- src/gui/rhi/qrhi.cpp | 101 ++++++++----- src/gui/rhi/qrhi_p.h | 8 +- src/gui/rhi/qrhi_p_p.h | 18 +-- src/gui/rhi/qrhid3d11.cpp | 213 +++++++++++++++++---------- src/gui/rhi/qrhid3d11_p_p.h | 28 +++- src/gui/rhi/qrhid3d12.cpp | 6 + src/gui/rhi/qrhid3d12_p_p.h | 1 + src/gui/rhi/qrhigles2.cpp | 6 + src/gui/rhi/qrhigles2_p_p.h | 1 + src/gui/rhi/qrhimetal.mm | 38 +++-- src/gui/rhi/qrhimetal_p_p.h | 3 +- src/gui/rhi/qrhinull.cpp | 6 + src/gui/rhi/qrhinull_p_p.h | 1 + src/gui/rhi/qrhivulkan.cpp | 142 ++++++++++++------ src/gui/rhi/qrhivulkan_p_p.h | 5 + tests/manual/rhi/offscreen/offscreen.cpp | 15 +- tests/manual/rhi/shared/examplefw.h | 2 +- tests/manual/rhi/triquadcube/triquadcube.cpp | 20 +-- 18 files changed, 404 insertions(+), 210 deletions(-) diff --git a/src/gui/rhi/qrhi.cpp b/src/gui/rhi/qrhi.cpp index 41b18e643e..4a2ca82d45 100644 --- a/src/gui/rhi/qrhi.cpp +++ b/src/gui/rhi/qrhi.cpp @@ -418,8 +418,15 @@ Q_LOGGING_CATEGORY(QRHI_LOG_INFO, "qt.rhi.general") \value EnableDebugMarkers Enables debug marker groups. Without this frame debugging features like making debug groups and custom resource name visible in external GPU debugging tools will not be available and functions - like QRhiCommandBuffer::debugMarkBegin() will become a no-op. Avoid - enabling in production builds as it may involve a performance penalty. + like QRhiCommandBuffer::debugMarkBegin() will become no-ops. Avoid enabling + in production builds as it may involve a small performance impact. Has no + effect when the QRhi::DebugMarkers feature is not reported as supported. + + \value EnableTimestamps Enables GPU timestamp collection. When not set, + QRhiCommandBuffer::lastCompletedGpuTime() always returns 0. Enable this + only when needed since there may be a small amount of extra work involved + (e.g. timestamp queries), depending on the underlying graphics API. Has no + effect when the QRhi::Timestamps feature is not reported as supported. \value PreferSoftwareRenderer Indicates that backends should prefer choosing an adapter or physical device that renders in software on the CPU. @@ -490,8 +497,9 @@ Q_LOGGING_CATEGORY(QRHI_LOG_INFO, "qt.rhi.general") QRhiCommandBuffer::debugMarkBegin()) are supported. \value Timestamps Indicates that command buffer timestamps are supported. - Relevant for addGpuFrameTimeCallback(). Can be expected to be supported on - D3D11 and Vulkan, assuming the underlying implementation supports it. + Relevant for QRhiCommandBuffer::lastCompletedGpuTime(). Can be expected to + be supported on Metal, Vulkan, and Direct 3D, assuming the underlying + implementation supports timestamp queries or similar. \value Instancing Indicates that instanced drawing is supported. In practice this feature will be unsupported with OpenGL ES 2.0 and OpenGL @@ -4853,11 +4861,21 @@ QRhiResource::Type QRhiSwapChain::resourceType() const /*! \fn QRhiCommandBuffer *QRhiSwapChain::currentFrameCommandBuffer() - \return a command buffer on which rendering commands can be recorded. Only - valid within a QRhi::beginFrame() - QRhi::endFrame() block where - beginFrame() was called with this swapchain. - - \note the value must not be cached and reused between frames + \return a command buffer on which rendering commands and resource updates + can be recorded within a \l{QRhi::beginFrame()}{beginFrame} - + \l{QRhi::endFrame()}{endFrame} block, assuming beginFrame() was called with + this swapchain. + + \note The returned object is valid also after endFrame(), up until the next + beginFrame(), but the returned command buffer should not be used to record + any commands then. Rather, it can be used to query data collected during + the frame (or previous frames), for example by calling + \l{QRhiCommandBuffer::lastCompletedGpuTime()}{lastCompletedGpuTime()}. + + \note The value must not be cached and reused between frames. The caller + should not hold on to the returned object once + \l{QRhi::beginFrame()}{beginFrame()} is called again. Instead, the command + buffer object should be queried again by calling this function. */ /*! @@ -5857,36 +5875,6 @@ void QRhi::runCleanup() d->cleanupCallbacks.clear(); } -/*! - Registers a \a callback that is called with an elapsed time calculated from - GPU timestamps asynchronously after a timestamp becomes available at some - point after presenting a frame. - - The callback is called with a float value that is meant to be in - milliseconds and represents the elapsed time on the GPU side for a given - frame. Care must be exercised with the interpretation of the value, as what - it exactly is is not controlled by Qt and depends on the underlying - graphics API and its implementation. In particular, comparing the values - between different graphics APIs is discouraged and may be meaningless. - - The timing values become available asynchronously, sometimes several frames - after the frame has been submitted in endFrame(). There is currently no way - to identify the frame. The callback is invoked whenever the timestamp - queries complete. - - \note This is only supported when the Timestamp feature is reported as - supported from isFeatureSupported(). Otherwise the \a callback is never - called. - - The \a callback is always called on the thread the QRhi lives and operates - on. While not guaranteed, it is typical that the callback is invoked from - within beginFrame(). - */ -void QRhi::addGpuFrameTimeCallback(const GpuFrameTimeCallback &callback) -{ - d->addGpuFrameTimeCallback(callback); -} - /*! \class QRhiResourceUpdateBatch \internal @@ -6901,6 +6889,41 @@ void QRhiCommandBuffer::endExternal() m_rhi->endExternal(this); } +/*! + \return the last available timestamp, in seconds. The value indicates the + elapsed time on the GPU during the last completed frame. + + Care must be exercised with the interpretation of the value, as its + precision and granularity is often not controlled by Qt, and depends on the + underlying graphics API and its implementation. In particular, comparing + the values between different graphics APIs and hardware is discouraged and + may be meaningless. + + The timing values may become available asynchronously. The returned value + may therefore be 0 or the last known value referring to some previous + frame. The value my also become 0 again under certain conditions, such as + when resizing the window. It can be expected that the most up-to-date + available value is retrieved in beginFrame() and becomes queriable via this + function once beginFrame() returns. + + \note Do not assume that the value refers to the previous + (\c{currently_recorded - 1}) frame. It may refer to \c{currently_recorded - + 2} or \c{currently_recorded - 3} as well. The exact behavior may depend on + the graphics API and its implementation. + + \note The result is always 0 when the QRhi::Timestamps feature is not + reported as supported, or when QRhi::EnableTimestamps was not passed to + QRhi::create(). There are exceptions to the latter, because with some + graphics APIs timings are available without having to perform extra + operations, but portable applications should always consciously opt-in to + timestamp collection when they know it is needed, and call this function + accordingly. + */ +double QRhiCommandBuffer::lastCompletedGpuTime() +{ + return m_rhi->lastCompletedGpuTime(this); +} + /*! \return the value (typically an offset) \a v aligned to the uniform buffer alignment given by by ubufAlignment(). diff --git a/src/gui/rhi/qrhi_p.h b/src/gui/rhi/qrhi_p.h index 8f7fce18ea..4af1dc746f 100644 --- a/src/gui/rhi/qrhi_p.h +++ b/src/gui/rhi/qrhi_p.h @@ -1666,6 +1666,8 @@ public: void beginExternal(); void endExternal(); + double lastCompletedGpuTime(); + protected: QRhiCommandBuffer(QRhiImplementation *rhi); }; @@ -1768,7 +1770,8 @@ public: enum Flag { EnableDebugMarkers = 1 << 0, PreferSoftwareRenderer = 1 << 1, - EnablePipelineCacheDataSave = 1 << 2 + EnablePipelineCacheDataSave = 1 << 2, + EnableTimestamps = 1 << 3 }; Q_DECLARE_FLAGS(Flags, Flag) @@ -1866,9 +1869,6 @@ public: void addCleanupCallback(const CleanupCallback &callback); void runCleanup(); - using GpuFrameTimeCallback = std::function; - void addGpuFrameTimeCallback(const GpuFrameTimeCallback &callback); - QRhiGraphicsPipeline *newGraphicsPipeline(); QRhiComputePipeline *newComputePipeline(); QRhiShaderResourceBindings *newShaderResourceBindings(); diff --git a/src/gui/rhi/qrhi_p_p.h b/src/gui/rhi/qrhi_p_p.h index 6641bf5822..8b653c67ea 100644 --- a/src/gui/rhi/qrhi_p_p.h +++ b/src/gui/rhi/qrhi_p_p.h @@ -120,6 +120,7 @@ public: virtual const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) = 0; virtual void beginExternal(QRhiCommandBuffer *cb) = 0; virtual void endExternal(QRhiCommandBuffer *cb) = 0; + virtual double lastCompletedGpuTime(QRhiCommandBuffer *cb) = 0; virtual QList supportedSampleCounts() const = 0; virtual int ubufAlignment() const = 0; @@ -177,22 +178,6 @@ public: cleanupCallbacks.append(callback); } - void addGpuFrameTimeCallback(const QRhi::GpuFrameTimeCallback &callback) - { - gpuFrameTimeCallbacks.append(callback); - } - - bool hasGpuFrameTimeCallback() const - { - return !gpuFrameTimeCallbacks.isEmpty(); - } - - void runGpuFrameTimeCallbacks(float t) - { - for (const QRhi::GpuFrameTimeCallback &f : std::as_const(gpuFrameTimeCallbacks)) - f(t); - } - bool sanityCheckGraphicsPipeline(QRhiGraphicsPipeline *ps); bool sanityCheckShaderResourceBindings(QRhiShaderResourceBindings *srb); void updateLayoutDesc(QRhiShaderResourceBindings *srb); @@ -253,7 +238,6 @@ private: QHash resources; QSet pendingDeleteResources; QVarLengthArray cleanupCallbacks; - QVarLengthArray gpuFrameTimeCallbacks; QElapsedTimer pipelineCreationTimer; qint64 accumulatedPipelineCreationTime = 0; diff --git a/src/gui/rhi/qrhid3d11.cpp b/src/gui/rhi/qrhid3d11.cpp index a52754b0a7..d995358568 100644 --- a/src/gui/rhi/qrhid3d11.cpp +++ b/src/gui/rhi/qrhid3d11.cpp @@ -315,6 +315,11 @@ bool QRhiD3D11::create(QRhi::Flags flags) if (FAILED(context->QueryInterface(__uuidof(ID3DUserDefinedAnnotation), reinterpret_cast(&annotations)))) annotations = nullptr; + if (flags.testFlag(QRhi::EnableTimestamps)) { + ofr.timestamps.prepare(2, this); + // timestamp queries are optional so we can go on even if they failed + } + deviceLost = false; nativeHandlesStruct.dev = dev; @@ -340,6 +345,8 @@ void QRhiD3D11::destroy() clearShaderCache(); + ofr.timestamps.destroy(); + if (annotations) { annotations->Release(); annotations = nullptr; @@ -1235,6 +1242,12 @@ void QRhiD3D11::endExternal(QRhiCommandBuffer *cb) } } +double QRhiD3D11::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + QD3D11CommandBuffer *cbD = QRHI_RES(QD3D11CommandBuffer, cb); + return cbD->lastGpuTime; +} + QRhi::FrameOpResult QRhiD3D11::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); @@ -1243,30 +1256,6 @@ QRhi::FrameOpResult QRhiD3D11::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF contextState.currentSwapChain = swapChainD; const int currentFrameSlot = swapChainD->currentFrameSlot; - if (swapChainD->timestampActive[currentFrameSlot]) { - ID3D11Query *tsDisjoint = swapChainD->timestampDisjointQuery[currentFrameSlot]; - const int tsIdx = QD3D11SwapChain::BUFFER_COUNT * currentFrameSlot; - ID3D11Query *tsStart = swapChainD->timestampQuery[tsIdx]; - ID3D11Query *tsEnd = swapChainD->timestampQuery[tsIdx + 1]; - quint64 timestamps[2]; - D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj; - bool ok = true; - ok &= context->GetData(tsDisjoint, &dj, sizeof(dj), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK; - ok &= context->GetData(tsEnd, ×tamps[1], sizeof(quint64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK; - // this above is often not ready, not even in frame_where_recorded+2, - // not clear why. so make the whole thing async and do not touch the - // queries until they are finally all available in frame this+2 or - // this+4 or ... - ok &= context->GetData(tsStart, ×tamps[0], sizeof(quint64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK; - if (ok) { - if (!dj.Disjoint && dj.Frequency) { - const float elapsedMs = (timestamps[1] - timestamps[0]) / float(dj.Frequency) * 1000.0f; - runGpuFrameTimeCallbacks(elapsedMs); - } - swapChainD->timestampActive[currentFrameSlot] = false; - } // else leave timestampActive set to true, will retry in a subsequent beginFrame - } - swapChainD->cb.resetState(); swapChainD->rt.d.rtv[0] = swapChainD->sampleDesc.Count > 1 ? @@ -1275,6 +1264,12 @@ QRhi::FrameOpResult QRhiD3D11::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF finishActiveReadbacks(); + if (swapChainD->timestamps.active[currentFrameSlot]) { + double elapsedSec = 0; + if (swapChainD->timestamps.tryQueryTimestamps(currentFrameSlot, context, &elapsedSec)) + swapChainD->cb.lastGpuTime = elapsedSec; + } + return QRhi::FrameOpSuccess; } @@ -1284,11 +1279,11 @@ QRhi::FrameOpResult QRhiD3D11::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame Q_ASSERT(contextState.currentSwapChain = swapChainD); const int currentFrameSlot = swapChainD->currentFrameSlot; - ID3D11Query *tsDisjoint = swapChainD->timestampDisjointQuery[currentFrameSlot]; + ID3D11Query *tsDisjoint = swapChainD->timestamps.disjointQuery[currentFrameSlot]; const int tsIdx = QD3D11SwapChain::BUFFER_COUNT * currentFrameSlot; - ID3D11Query *tsStart = swapChainD->timestampQuery[tsIdx]; - ID3D11Query *tsEnd = swapChainD->timestampQuery[tsIdx + 1]; - const bool recordTimestamps = tsDisjoint && tsStart && tsEnd && !swapChainD->timestampActive[currentFrameSlot]; + ID3D11Query *tsStart = swapChainD->timestamps.query[tsIdx]; + ID3D11Query *tsEnd = swapChainD->timestamps.query[tsIdx + 1]; + const bool recordTimestamps = tsDisjoint && tsStart && tsEnd && !swapChainD->timestamps.active[currentFrameSlot]; // send all commands to the context if (recordTimestamps) @@ -1306,7 +1301,7 @@ QRhi::FrameOpResult QRhiD3D11::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame if (recordTimestamps) { context->End(tsEnd); context->End(tsDisjoint); - swapChainD->timestampActive[currentFrameSlot] = true; + swapChainD->timestamps.active[currentFrameSlot] = true; } if (!flags.testFlag(QRhi::SkipPresent)) { @@ -1347,6 +1342,12 @@ QRhi::FrameOpResult QRhiD3D11::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi: ofr.cbWrapper.resetState(); *cb = &ofr.cbWrapper; + if (ofr.timestamps.active[ofr.timestampIdx]) { + double elapsedSec = 0; + if (ofr.timestamps.tryQueryTimestamps(ofr.timestampIdx, context, &elapsedSec)) + ofr.cbWrapper.lastGpuTime = elapsedSec; + } + return QRhi::FrameOpSuccess; } @@ -1355,10 +1356,27 @@ QRhi::FrameOpResult QRhiD3D11::endOffscreenFrame(QRhi::EndFrameFlags flags) Q_UNUSED(flags); ofr.active = false; + ID3D11Query *tsDisjoint = ofr.timestamps.disjointQuery[ofr.timestampIdx]; + ID3D11Query *tsStart = ofr.timestamps.query[ofr.timestampIdx * 2]; + ID3D11Query *tsEnd = ofr.timestamps.query[ofr.timestampIdx * 2 + 1]; + const bool recordTimestamps = tsDisjoint && tsStart && tsEnd && !ofr.timestamps.active[ofr.timestampIdx]; + if (recordTimestamps) { + context->Begin(tsDisjoint); + context->End(tsStart); // record timestamp; no Begin() for D3D11_QUERY_TIMESTAMP + } + executeCommandBuffer(&ofr.cbWrapper); + context->Flush(); finishActiveReadbacks(); + if (recordTimestamps) { + context->End(tsEnd); + context->End(tsDisjoint); + ofr.timestamps.active[ofr.timestampIdx] = true; + ofr.timestampIdx = (ofr.timestampIdx + 1) % 2; + } + return QRhi::FrameOpSuccess; } @@ -2607,10 +2625,10 @@ void QRhiD3D11::executeCommandBuffer(QD3D11CommandBuffer *cbD, QD3D11SwapChain * if (timestampSwapChain) { const int currentFrameSlot = timestampSwapChain->currentFrameSlot; - ID3D11Query *tsDisjoint = timestampSwapChain->timestampDisjointQuery[currentFrameSlot]; + ID3D11Query *tsDisjoint = timestampSwapChain->timestamps.disjointQuery[currentFrameSlot]; const int tsIdx = QD3D11SwapChain::BUFFER_COUNT * currentFrameSlot; - ID3D11Query *tsStart = timestampSwapChain->timestampQuery[tsIdx]; - if (tsDisjoint && tsStart && !timestampSwapChain->timestampActive[currentFrameSlot]) { + ID3D11Query *tsStart = timestampSwapChain->timestamps.query[tsIdx]; + if (tsDisjoint && tsStart && !timestampSwapChain->timestamps.active[currentFrameSlot]) { // The timestamps seem to include vsync time with Present(1), except // when running on a non-primary gpu. This is not ideal. So try working // it around by issuing a semi-fake OMSetRenderTargets early and @@ -4636,6 +4654,92 @@ void QD3D11CommandBuffer::destroy() // nothing to do here } +bool QD3D11Timestamps::prepare(int pairCount, QRhiD3D11 *rhiD) +{ + // Creates the query objects if not yet done, but otherwise calling this + // function is expected to be a no-op. + + Q_ASSERT(pairCount <= MAX_TIMESTAMP_PAIRS); + D3D11_QUERY_DESC queryDesc = {}; + for (int i = 0; i < pairCount; ++i) { + if (!disjointQuery[i]) { + queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; + HRESULT hr = rhiD->dev->CreateQuery(&queryDesc, &disjointQuery[i]); + if (FAILED(hr)) { + qWarning("Failed to create timestamp disjoint query: %s", + qPrintable(QSystemError::windowsComString(hr))); + return false; + } + } + queryDesc.Query = D3D11_QUERY_TIMESTAMP; + for (int j = 0; j < 2; ++j) { + const int idx = pairCount * i + j; + if (!query[idx]) { + HRESULT hr = rhiD->dev->CreateQuery(&queryDesc, &query[idx]); + if (FAILED(hr)) { + qWarning("Failed to create timestamp query: %s", + qPrintable(QSystemError::windowsComString(hr))); + return false; + } + } + } + } + this->pairCount = pairCount; + return true; +} + +void QD3D11Timestamps::destroy() +{ + for (int i = 0; i < MAX_TIMESTAMP_PAIRS; ++i) { + active[i] = false; + if (disjointQuery[i]) { + disjointQuery[i]->Release(); + disjointQuery[i] = nullptr; + } + for (int j = 0; j < 2; ++j) { + const int idx = MAX_TIMESTAMP_PAIRS * i + j; + if (query[idx]) { + query[idx]->Release(); + query[idx] = nullptr; + } + } + } +} + +bool QD3D11Timestamps::tryQueryTimestamps(int idx, ID3D11DeviceContext *context, double *elapsedSec) +{ + bool result = false; + if (!active[idx]) + return result; + + ID3D11Query *tsDisjoint = disjointQuery[idx]; + const int tsIdx = pairCount * idx; + ID3D11Query *tsStart = query[tsIdx]; + ID3D11Query *tsEnd = query[tsIdx + 1]; + quint64 timestamps[2]; + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj; + + bool ok = true; + ok &= context->GetData(tsDisjoint, &dj, sizeof(dj), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK; + ok &= context->GetData(tsEnd, ×tamps[1], sizeof(quint64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK; + // this above is often not ready, not even in frame_where_recorded+2, + // not clear why. so make the whole thing async and do not touch the + // queries until they are finally all available in frame this+2 or + // this+4 or ... + ok &= context->GetData(tsStart, ×tamps[0], sizeof(quint64), D3D11_ASYNC_GETDATA_DONOTFLUSH) == S_OK; + + if (ok) { + if (!dj.Disjoint && dj.Frequency) { + const float elapsedMs = (timestamps[1] - timestamps[0]) / float(dj.Frequency) * 1000.0f; + *elapsedSec = elapsedMs / 1000.0; + result = true; + } + active[idx] = false; + } // else leave active set, will retry in a subsequent beginFrame or similar + + return result; +} + QD3D11SwapChain::QD3D11SwapChain(QRhiImplementation *rhi) : QRhiSwapChain(rhi), rt(rhi, this), @@ -4646,10 +4750,6 @@ QD3D11SwapChain::QD3D11SwapChain(QRhiImplementation *rhi) for (int i = 0; i < BUFFER_COUNT; ++i) { msaaTex[i] = nullptr; msaaRtv[i] = nullptr; - timestampActive[i] = false; - timestampDisjointQuery[i] = nullptr; - timestampQuery[2 * i] = nullptr; - timestampQuery[2 * i + 1] = nullptr; } } @@ -4687,19 +4787,7 @@ void QD3D11SwapChain::destroy() releaseBuffers(); - for (int i = 0; i < BUFFER_COUNT; ++i) { - if (timestampDisjointQuery[i]) { - timestampDisjointQuery[i]->Release(); - timestampDisjointQuery[i] = nullptr; - } - for (int j = 0; j < 2; ++j) { - const int idx = BUFFER_COUNT * i + j; - if (timestampQuery[idx]) { - timestampQuery[idx]->Release(); - timestampQuery[idx] = nullptr; - } - } - } + timestamps.destroy(); swapChain->Release(); swapChain = nullptr; @@ -5128,31 +5216,8 @@ bool QD3D11SwapChain::createOrResize() rtD->d.colorAttCount = 1; rtD->d.dsAttCount = m_depthStencil ? 1 : 0; - if (rhiD->hasGpuFrameTimeCallback()) { - D3D11_QUERY_DESC queryDesc = {}; - for (int i = 0; i < BUFFER_COUNT; ++i) { - if (!timestampDisjointQuery[i]) { - queryDesc.Query = D3D11_QUERY_TIMESTAMP_DISJOINT; - hr = rhiD->dev->CreateQuery(&queryDesc, ×tampDisjointQuery[i]); - if (FAILED(hr)) { - qWarning("Failed to create timestamp disjoint query: %s", - qPrintable(QSystemError::windowsComString(hr))); - break; - } - } - queryDesc.Query = D3D11_QUERY_TIMESTAMP; - for (int j = 0; j < 2; ++j) { - const int idx = BUFFER_COUNT * i + j; // one pair per buffer (frame) - if (!timestampQuery[idx]) { - hr = rhiD->dev->CreateQuery(&queryDesc, ×tampQuery[idx]); - if (FAILED(hr)) { - qWarning("Failed to create timestamp query: %s", - qPrintable(QSystemError::windowsComString(hr))); - break; - } - } - } - } + if (rhiD->rhiFlags.testFlag(QRhi::EnableTimestamps)) { + timestamps.prepare(BUFFER_COUNT, rhiD); // timestamp queries are optional so we can go on even if they failed } diff --git a/src/gui/rhi/qrhid3d11_p_p.h b/src/gui/rhi/qrhid3d11_p_p.h index b6f8b15e16..232afb696f 100644 --- a/src/gui/rhi/qrhid3d11_p_p.h +++ b/src/gui/rhi/qrhid3d11_p_p.h @@ -26,6 +26,8 @@ QT_BEGIN_NAMESPACE +class QRhiD3D11; + struct QD3D11Buffer : public QRhiBuffer { QD3D11Buffer(QRhiImplementation *rhi, Type type, UsageFlags usage, quint32 size); @@ -496,6 +498,7 @@ struct QD3D11CommandBuffer : public QRhiCommandBuffer QRhiBackendCommandList commands; PassType recordingPass; + double lastGpuTime = 0; QRhiRenderTarget *currentTarget; QRhiGraphicsPipeline *currentGraphicsPipeline; QRhiComputePipeline *currentComputePipeline; @@ -534,6 +537,7 @@ struct QD3D11CommandBuffer : public QRhiCommandBuffer } void resetState() { recordingPass = NoPass; + // do not zero lastGpuTime currentTarget = nullptr; resetCommands(); resetCachedState(); @@ -553,6 +557,21 @@ struct QD3D11CommandBuffer : public QRhiCommandBuffer } }; +static const int QD3D11_SWAPCHAIN_BUFFER_COUNT = 2; + +struct QD3D11Timestamps +{ + static const int MAX_TIMESTAMP_PAIRS = QD3D11_SWAPCHAIN_BUFFER_COUNT; + bool active[MAX_TIMESTAMP_PAIRS] = {}; + ID3D11Query *disjointQuery[MAX_TIMESTAMP_PAIRS] = {}; + ID3D11Query *query[MAX_TIMESTAMP_PAIRS * 2] = {}; + int pairCount = 0; + + bool prepare(int pairCount, QRhiD3D11 *rhiD); + void destroy(); + bool tryQueryTimestamps(int idx, ID3D11DeviceContext *context, double *elapsedSec); +}; + struct QD3D11SwapChain : public QRhiSwapChain { QD3D11SwapChain(QRhiImplementation *rhi); @@ -581,21 +600,19 @@ struct QD3D11SwapChain : public QRhiSwapChain DXGI_FORMAT srgbAdjustedColorFormat; IDXGISwapChain *swapChain = nullptr; UINT swapChainFlags = 0; - static const int BUFFER_COUNT = 2; ID3D11Texture2D *backBufferTex; ID3D11RenderTargetView *backBufferRtv; + static const int BUFFER_COUNT = QD3D11_SWAPCHAIN_BUFFER_COUNT; ID3D11Texture2D *msaaTex[BUFFER_COUNT]; ID3D11RenderTargetView *msaaRtv[BUFFER_COUNT]; DXGI_SAMPLE_DESC sampleDesc; int currentFrameSlot = 0; int frameCount = 0; QD3D11RenderBuffer *ds = nullptr; - bool timestampActive[BUFFER_COUNT]; - ID3D11Query *timestampDisjointQuery[BUFFER_COUNT]; - ID3D11Query *timestampQuery[BUFFER_COUNT * 2]; UINT swapInterval = 1; IDCompositionTarget *dcompTarget = nullptr; IDCompositionVisual *dcompVisual = nullptr; + QD3D11Timestamps timestamps; }; class QRhiD3D11 : public QRhiImplementation @@ -689,6 +706,7 @@ public: const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; + double lastCompletedGpuTime(QRhiCommandBuffer *cb) override; QList supportedSampleCounts() const override; int ubufAlignment() const override; @@ -761,6 +779,8 @@ public: OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { } bool active = false; QD3D11CommandBuffer cbWrapper; + QD3D11Timestamps timestamps; + int timestampIdx = 0; } ofr; struct TextureReadback { diff --git a/src/gui/rhi/qrhid3d12.cpp b/src/gui/rhi/qrhid3d12.cpp index 78f8339807..2b129b5742 100644 --- a/src/gui/rhi/qrhid3d12.cpp +++ b/src/gui/rhi/qrhid3d12.cpp @@ -1333,6 +1333,12 @@ void QRhiD3D12::endExternal(QRhiCommandBuffer *cb) } } +double QRhiD3D12::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + Q_UNUSED(cb); + return 0; +} + QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); diff --git a/src/gui/rhi/qrhid3d12_p_p.h b/src/gui/rhi/qrhid3d12_p_p.h index dec217e771..d8ad22ce29 100644 --- a/src/gui/rhi/qrhid3d12_p_p.h +++ b/src/gui/rhi/qrhid3d12_p_p.h @@ -1088,6 +1088,7 @@ public: const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; + double lastCompletedGpuTime(QRhiCommandBuffer *cb) override; QList supportedSampleCounts() const override; int ubufAlignment() const override; diff --git a/src/gui/rhi/qrhigles2.cpp b/src/gui/rhi/qrhigles2.cpp index acdf1ec04c..4620dbd2ca 100644 --- a/src/gui/rhi/qrhigles2.cpp +++ b/src/gui/rhi/qrhigles2.cpp @@ -1962,6 +1962,12 @@ void QRhiGles2::endExternal(QRhiCommandBuffer *cb) enqueueBindFramebuffer(cbD->currentTarget, cbD); } +double QRhiGles2::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + Q_UNUSED(cb); + return 0; +} + QRhi::FrameOpResult QRhiGles2::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags) { QGles2SwapChain *swapChainD = QRHI_RES(QGles2SwapChain, swapChain); diff --git a/src/gui/rhi/qrhigles2_p_p.h b/src/gui/rhi/qrhigles2_p_p.h index a5de418820..f4cf7b98b6 100644 --- a/src/gui/rhi/qrhigles2_p_p.h +++ b/src/gui/rhi/qrhigles2_p_p.h @@ -806,6 +806,7 @@ public: const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; + double lastCompletedGpuTime(QRhiCommandBuffer *cb) override; QList supportedSampleCounts() const override; int ubufAlignment() const override; diff --git a/src/gui/rhi/qrhimetal.mm b/src/gui/rhi/qrhimetal.mm index b05b35f81c..20b28e9dc6 100644 --- a/src/gui/rhi/qrhimetal.mm +++ b/src/gui/rhi/qrhimetal.mm @@ -194,6 +194,7 @@ struct QRhiMetalData struct OffscreenFrame { OffscreenFrame(QRhiImplementation *rhi) : cbWrapper(rhi) { } bool active = false; + double lastGpuTime = 0; QMetalCommandBuffer cbWrapper; } ofr; @@ -296,6 +297,7 @@ struct QMetalShaderResourceBindingsData { struct QMetalCommandBufferData { id cb; + double lastGpuTime = 0; id currentRenderPassEncoder; id currentComputePassEncoder; id tessellationComputeEncoder; @@ -413,6 +415,7 @@ struct QMetalSwapChainData CAMetalLayer *layer = nullptr; id curDrawable = nil; dispatch_semaphore_t sem[QMTL_FRAMES_IN_FLIGHT]; + double lastGpuTime[QMTL_FRAMES_IN_FLIGHT]; MTLRenderPassDescriptor *rp = nullptr; id msaaTex[QMTL_FRAMES_IN_FLIGHT]; QRhiTexture::Format rhiColorFormat; @@ -727,7 +730,7 @@ bool QRhiMetal::isFeatureSupported(QRhi::Feature feature) const case QRhi::DebugMarkers: return true; case QRhi::Timestamps: - return false; + return true; case QRhi::Instancing: return true; case QRhi::CustomInstanceStepRate: @@ -2237,6 +2240,12 @@ void QRhiMetal::endExternal(QRhiCommandBuffer *cb) cbD->resetPerPassCachedState(); } +double QRhiMetal::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + QMetalCommandBuffer *cbD = QRHI_RES(QMetalCommandBuffer, cb); + return cbD->d->lastGpuTime; +} + QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); @@ -2285,7 +2294,8 @@ QRhi::FrameOpResult QRhiMetal::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF swapChainD->ds->lastActiveFrameSlot = currentFrameSlot; executeDeferredReleases(); - swapChainD->cbWrapper.resetState(); + swapChainD->cbWrapper.resetState(swapChainD->d->lastGpuTime[currentFrameSlot]); + swapChainD->d->lastGpuTime[currentFrameSlot] = 0; finishActiveReadbacks(); return QRhi::FrameOpSuccess; @@ -2297,7 +2307,8 @@ QRhi::FrameOpResult QRhiMetal::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame Q_ASSERT(currentSwapChain == swapChainD); __block int thisFrameSlot = currentFrameSlot; - [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id) { + [swapChainD->cbWrapper.d->cb addCompletedHandler: ^(id cb) { + swapChainD->d->lastGpuTime[thisFrameSlot] += cb.GPUEndTime - cb.GPUStartTime; dispatch_semaphore_signal(swapChainD->d->sem[thisFrameSlot]); }]; @@ -2350,7 +2361,8 @@ QRhi::FrameOpResult QRhiMetal::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi: d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; executeDeferredReleases(); - d->ofr.cbWrapper.resetState(); + d->ofr.cbWrapper.resetState(d->ofr.lastGpuTime); + d->ofr.lastGpuTime = 0; finishActiveReadbacks(); return QRhi::FrameOpSuccess; @@ -2362,10 +2374,13 @@ QRhi::FrameOpResult QRhiMetal::endOffscreenFrame(QRhi::EndFrameFlags flags) Q_ASSERT(d->ofr.active); d->ofr.active = false; - [d->ofr.cbWrapper.d->cb commit]; + id cb = d->ofr.cbWrapper.d->cb; + [cb commit]; // offscreen frames wait for completion, unlike swapchain ones - [d->ofr.cbWrapper.d->cb waitUntilCompleted]; + [cb waitUntilCompleted]; + + d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime; finishActiveReadbacks(true); @@ -2406,10 +2421,13 @@ QRhi::FrameOpResult QRhiMetal::finish() } if (inFrame) { - if (d->ofr.active) + if (d->ofr.active) { + d->ofr.lastGpuTime += cb.GPUEndTime - cb.GPUStartTime; d->ofr.cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; - else + } else { + swapChainD->d->lastGpuTime[currentFrameSlot] += cb.GPUEndTime - cb.GPUStartTime; swapChainD->cbWrapper.d->cb = [d->cmdQueue commandBufferWithUnretainedReferences]; + } } executeDeferredReleases(true); @@ -5914,8 +5932,9 @@ const QRhiNativeHandles *QMetalCommandBuffer::nativeHandles() return &nativeHandlesStruct; } -void QMetalCommandBuffer::resetState() +void QMetalCommandBuffer::resetState(double lastGpuTime) { + d->lastGpuTime = lastGpuTime; d->currentRenderPassEncoder = nil; d->currentComputePassEncoder = nil; d->tessellationComputeEncoder = nil; @@ -6222,6 +6241,7 @@ bool QMetalSwapChain::createOrResize() d->curDrawable = nil; for (int i = 0; i < QMTL_FRAMES_IN_FLIGHT; ++i) { + d->lastGpuTime[i] = 0; if (!d->sem[i]) d->sem[i] = dispatch_semaphore_create(QMTL_FRAMES_IN_FLIGHT - 1); } diff --git a/src/gui/rhi/qrhimetal_p_p.h b/src/gui/rhi/qrhimetal_p_p.h index c76a29b0a8..9f60923851 100644 --- a/src/gui/rhi/qrhimetal_p_p.h +++ b/src/gui/rhi/qrhimetal_p_p.h @@ -283,7 +283,7 @@ struct QMetalCommandBuffer : public QRhiCommandBuffer QPair currentDepthBiasValues; const QRhiNativeHandles *nativeHandles(); - void resetState(); + void resetState(double lastGpuTime = 0); void resetPerPassState(); void resetPerPassCachedState(); }; @@ -418,6 +418,7 @@ public: const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; + double lastCompletedGpuTime(QRhiCommandBuffer *cb) override; QList supportedSampleCounts() const override; int ubufAlignment() const override; diff --git a/src/gui/rhi/qrhinull.cpp b/src/gui/rhi/qrhinull.cpp index 40870aab6a..6100492457 100644 --- a/src/gui/rhi/qrhinull.cpp +++ b/src/gui/rhi/qrhinull.cpp @@ -344,6 +344,12 @@ void QRhiNull::endExternal(QRhiCommandBuffer *cb) Q_UNUSED(cb); } +double QRhiNull::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + Q_UNUSED(cb); + return 0; +} + QRhi::FrameOpResult QRhiNull::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags) { Q_UNUSED(flags); diff --git a/src/gui/rhi/qrhinull_p_p.h b/src/gui/rhi/qrhinull_p_p.h index 2ace5a36b6..152622d117 100644 --- a/src/gui/rhi/qrhinull_p_p.h +++ b/src/gui/rhi/qrhinull_p_p.h @@ -261,6 +261,7 @@ public: const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; + double lastCompletedGpuTime(QRhiCommandBuffer *cb) override; QList supportedSampleCounts() const override; int ubufAlignment() const override; diff --git a/src/gui/rhi/qrhivulkan.cpp b/src/gui/rhi/qrhivulkan.cpp index b2f72dce49..17e291eee1 100644 --- a/src/gui/rhi/qrhivulkan.cpp +++ b/src/gui/rhi/qrhivulkan.cpp @@ -1671,6 +1671,24 @@ void QRhiVulkan::ensureCommandPoolForNewFrame() df->vkResetCommandPool(dev, cmdPool[currentFrameSlot], flags); } +double QRhiVulkan::elapsedSecondsFromTimestamp(quint64 timestamp[2], bool *ok) +{ + quint64 mask = 0; + for (quint64 i = 0; i < timestampValidBits; i += 8) + mask |= 0xFFULL << i; + const quint64 ts0 = timestamp[0] & mask; + const quint64 ts1 = timestamp[1] & mask; + const float nsecsPerTick = physDevProperties.limits.timestampPeriod; + if (!qFuzzyIsNull(nsecsPerTick)) { + const float elapsedMs = float(ts1 - ts0) * nsecsPerTick / 1000000.0f; + const double elapsedSec = elapsedMs / 1000.0; + *ok = true; + return elapsedSec; + } + *ok = false; + return 0; +} + QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags) { QVkSwapChain *swapChainD = QRHI_RES(QVkSwapChain, swapChain); @@ -1720,30 +1738,6 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin // mess up A's in-flight commands (as they are not in flight anymore). waitCommandCompletion(frameResIndex); - // Now is the time to read the timestamps for the previous frame for this slot. - if (frame.timestampQueryIndex >= 0) { - quint64 timestamp[2] = { 0, 0 }; - VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(frame.timestampQueryIndex), 2, - 2 * sizeof(quint64), timestamp, sizeof(quint64), - VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); - timestampQueryPoolMap.clearBit(frame.timestampQueryIndex / 2); - frame.timestampQueryIndex = -1; - if (err == VK_SUCCESS) { - quint64 mask = 0; - for (quint64 i = 0; i < timestampValidBits; i += 8) - mask |= 0xFFULL << i; - const quint64 ts0 = timestamp[0] & mask; - const quint64 ts1 = timestamp[1] & mask; - const float nsecsPerTick = physDevProperties.limits.timestampPeriod; - if (!qFuzzyIsNull(nsecsPerTick)) { - const float elapsedMs = float(ts1 - ts0) * nsecsPerTick / 1000000.0f; - runGpuFrameTimeCallbacks(elapsedMs); - } - } else { - qWarning("Failed to query timestamp: %d", err); - } - } - currentFrameSlot = int(swapChainD->currentFrameSlot); currentSwapChain = swapChainD; if (swapChainD->ds) @@ -1757,9 +1751,34 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin if (cbres != QRhi::FrameOpSuccess) return cbres; - // when profiling is enabled, pick a free query (pair) from the pool - int timestampQueryIdx = -1; - if (hasGpuFrameTimeCallback() && swapChainD->bufferCount > 1) { // no timestamps if not having at least 2 frames in flight + swapChainD->cbWrapper.cb = frame.cmdBuf; + + QVkSwapChain::ImageResources &image(swapChainD->imageRes[swapChainD->currentImageIndex]); + swapChainD->rtWrapper.d.fb = image.fb; + + prepareNewFrame(&swapChainD->cbWrapper); + + // Read the timestamps for the previous frame for this slot. + if (frame.timestampQueryIndex >= 0) { + quint64 timestamp[2] = { 0, 0 }; + VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(frame.timestampQueryIndex), 2, + 2 * sizeof(quint64), timestamp, sizeof(quint64), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + timestampQueryPoolMap.clearBit(frame.timestampQueryIndex / 2); + frame.timestampQueryIndex = -1; + if (err == VK_SUCCESS) { + bool ok = false; + const double elapsedSec = elapsedSecondsFromTimestamp(timestamp, &ok); + if (ok) + swapChainD->cbWrapper.lastGpuTime = elapsedSec; + } else { + qWarning("Failed to query timestamp: %d", err); + } + } + + // No timestamps if the client did not opt in, or when not having at least 2 frames in flight. + if (rhiFlags.testFlag(QRhi::EnableTimestamps) && swapChainD->bufferCount > 1) { + int timestampQueryIdx = -1; for (int i = 0; i < timestampQueryPoolMap.size(); ++i) { if (!timestampQueryPoolMap.testBit(i)) { timestampQueryPoolMap.setBit(i); @@ -1767,21 +1786,14 @@ QRhi::FrameOpResult QRhiVulkan::beginFrame(QRhiSwapChain *swapChain, QRhi::Begin break; } } + if (timestampQueryIdx >= 0) { + df->vkCmdResetQueryPool(frame.cmdBuf, timestampQueryPool, uint32_t(timestampQueryIdx), 2); + // record timestamp at the start of the command buffer + df->vkCmdWriteTimestamp(frame.cmdBuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + timestampQueryPool, uint32_t(timestampQueryIdx)); + frame.timestampQueryIndex = timestampQueryIdx; + } } - if (timestampQueryIdx >= 0) { - df->vkCmdResetQueryPool(frame.cmdBuf, timestampQueryPool, uint32_t(timestampQueryIdx), 2); - // record timestamp at the start of the command buffer - df->vkCmdWriteTimestamp(frame.cmdBuf, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, - timestampQueryPool, uint32_t(timestampQueryIdx)); - frame.timestampQueryIndex = timestampQueryIdx; - } - - swapChainD->cbWrapper.cb = frame.cmdBuf; - - QVkSwapChain::ImageResources &image(swapChainD->imageRes[swapChainD->currentImageIndex]); - swapChainD->rtWrapper.d.fb = image.fb; - - prepareNewFrame(&swapChainD->cbWrapper); return QRhi::FrameOpSuccess; } @@ -2031,6 +2043,24 @@ QRhi::FrameOpResult QRhiVulkan::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi prepareNewFrame(cbWrapper); ofr.active = true; + if (rhiFlags.testFlag(QRhi::EnableTimestamps)) { + int timestampQueryIdx = -1; + for (int i = 0; i < timestampQueryPoolMap.size(); ++i) { + if (!timestampQueryPoolMap.testBit(i)) { + timestampQueryPoolMap.setBit(i); + timestampQueryIdx = i * 2; + break; + } + } + if (timestampQueryIdx >= 0) { + df->vkCmdResetQueryPool(cbWrapper->cb, timestampQueryPool, uint32_t(timestampQueryIdx), 2); + // record timestamp at the start of the command buffer + df->vkCmdWriteTimestamp(cbWrapper->cb, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, + timestampQueryPool, uint32_t(timestampQueryIdx)); + ofr.timestampQueryIndex = timestampQueryIdx; + } + } + *cb = cbWrapper; return QRhi::FrameOpSuccess; } @@ -2044,6 +2074,12 @@ QRhi::FrameOpResult QRhiVulkan::endOffscreenFrame(QRhi::EndFrameFlags flags) QVkCommandBuffer *cbWrapper(ofr.cbWrapper[currentFrameSlot]); recordPrimaryCommandBuffer(cbWrapper); + // record another timestamp, when enabled + if (ofr.timestampQueryIndex >= 0) { + df->vkCmdWriteTimestamp(cbWrapper->cb, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, + timestampQueryPool, uint32_t(ofr.timestampQueryIndex + 1)); + } + if (!ofr.cmdFence) { VkFenceCreateInfo fenceInfo = {}; fenceInfo.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO; @@ -2066,6 +2102,24 @@ QRhi::FrameOpResult QRhiVulkan::endOffscreenFrame(QRhi::EndFrameFlags flags) // previous) frame is safe since we waited for completion above. finishActiveReadbacks(true); + // Read the timestamps, if we wrote them. + if (ofr.timestampQueryIndex >= 0) { + quint64 timestamp[2] = { 0, 0 }; + VkResult err = df->vkGetQueryPoolResults(dev, timestampQueryPool, uint32_t(ofr.timestampQueryIndex), 2, + 2 * sizeof(quint64), timestamp, sizeof(quint64), + VK_QUERY_RESULT_64_BIT | VK_QUERY_RESULT_WAIT_BIT); + timestampQueryPoolMap.clearBit(ofr.timestampQueryIndex / 2); + ofr.timestampQueryIndex = -1; + if (err == VK_SUCCESS) { + bool ok = false; + const double elapsedSec = elapsedSecondsFromTimestamp(timestamp, &ok); + if (ok) + cbWrapper->lastGpuTime = elapsedSec; + } else { + qWarning("Failed to query timestamp: %d", err); + } + } + return QRhi::FrameOpSuccess; } @@ -5153,6 +5207,12 @@ void QRhiVulkan::endExternal(QRhiCommandBuffer *cb) cbD->resetCachedState(); } +double QRhiVulkan::lastCompletedGpuTime(QRhiCommandBuffer *cb) +{ + QVkCommandBuffer *cbD = QRHI_RES(QVkCommandBuffer, cb); + return cbD->lastGpuTime; +} + void QRhiVulkan::setObjectName(uint64_t object, VkObjectType type, const QByteArray &name, int slot) { #ifdef VK_EXT_debug_utils diff --git a/src/gui/rhi/qrhivulkan_p_p.h b/src/gui/rhi/qrhivulkan_p_p.h index 8999ea9580..69bd6df305 100644 --- a/src/gui/rhi/qrhivulkan_p_p.h +++ b/src/gui/rhi/qrhivulkan_p_p.h @@ -320,6 +320,7 @@ struct QVkCommandBuffer : public QRhiCommandBuffer void resetState() { recordingPass = NoPass; passUsesSecondaryCb = false; + lastGpuTime = 0; currentTarget = nullptr; activeSecondaryCbStack.clear(); resetCommands(); @@ -344,6 +345,7 @@ struct QVkCommandBuffer : public QRhiCommandBuffer PassType recordingPass; bool passUsesSecondaryCb; + double lastGpuTime = 0; QRhiRenderTarget *currentTarget; QRhiGraphicsPipeline *currentGraphicsPipeline; QRhiComputePipeline *currentComputePipeline; @@ -722,6 +724,7 @@ public: const QRhiNativeHandles *nativeHandles(QRhiCommandBuffer *cb) override; void beginExternal(QRhiCommandBuffer *cb) override; void endExternal(QRhiCommandBuffer *cb) override; + double lastCompletedGpuTime(QRhiCommandBuffer *cb) override; QList supportedSampleCounts() const override; int ubufAlignment() const override; @@ -813,6 +816,7 @@ public: int startLevel, int levelCount); void updateShaderResourceBindings(QRhiShaderResourceBindings *srb, int descSetIdx = -1); void ensureCommandPoolForNewFrame(); + double elapsedSecondsFromTimestamp(quint64 timestamp[2], bool *ok); QVulkanInstance *inst = nullptr; QWindow *maybeWindow = nullptr; @@ -903,6 +907,7 @@ public: bool active = false; QVkCommandBuffer *cbWrapper[QVK_FRAMES_IN_FLIGHT]; VkFence cmdFence = VK_NULL_HANDLE; + int timestampQueryIndex = -1; } ofr; struct TextureReadback { diff --git a/tests/manual/rhi/offscreen/offscreen.cpp b/tests/manual/rhi/offscreen/offscreen.cpp index 244e809156..1334fe3fb6 100644 --- a/tests/manual/rhi/offscreen/offscreen.cpp +++ b/tests/manual/rhi/offscreen/offscreen.cpp @@ -126,10 +126,11 @@ int main(int argc, char **argv) qDebug("This is a multi-api example, use command line arguments to override:\n%s", qPrintable(cmdLineParser.helpText())); QRhi *r = nullptr; + QRhi::Flags rhiFlags = QRhi::EnableTimestamps; if (graphicsApi == Null) { QRhiNullInitParams params; - r = QRhi::create(QRhi::Null, ¶ms); + r = QRhi::create(QRhi::Null, ¶ms, rhiFlags); } #if QT_CONFIG(vulkan) @@ -141,7 +142,7 @@ int main(int argc, char **argv) if (inst.create()) { QRhiVulkanInitParams params; params.inst = &inst; - r = QRhi::create(QRhi::Vulkan, ¶ms); + r = QRhi::create(QRhi::Vulkan, ¶ms, rhiFlags); } else { qWarning("Failed to create Vulkan instance, switching to OpenGL"); graphicsApi = OpenGL; @@ -155,7 +156,7 @@ int main(int argc, char **argv) offscreenSurface.reset(QRhiGles2InitParams::newFallbackSurface()); QRhiGles2InitParams params; params.fallbackSurface = offscreenSurface.data(); - r = QRhi::create(QRhi::OpenGLES2, ¶ms); + r = QRhi::create(QRhi::OpenGLES2, ¶ms, rhiFlags); } #endif @@ -163,18 +164,18 @@ int main(int argc, char **argv) if (graphicsApi == D3D11) { QRhiD3D11InitParams params; params.enableDebugLayer = true; - r = QRhi::create(QRhi::D3D11, ¶ms); + r = QRhi::create(QRhi::D3D11, ¶ms, rhiFlags); } else if (graphicsApi == D3D12) { QRhiD3D12InitParams params; params.enableDebugLayer = true; - r = QRhi::create(QRhi::D3D12, ¶ms); + r = QRhi::create(QRhi::D3D12, ¶ms, rhiFlags); } #endif #if defined(Q_OS_MACOS) || defined(Q_OS_IOS) if (graphicsApi == Metal) { QRhiMetalInitParams params; - r = QRhi::create(QRhi::Metal, ¶ms); + r = QRhi::create(QRhi::Metal, ¶ms, rhiFlags); } #endif @@ -301,6 +302,8 @@ int main(int argc, char **argv) #ifdef TEST_FINISH r->endOffscreenFrame(); #endif + if (r->isFeatureSupported(QRhi::Timestamps)) + qDebug() << "GPU time:" << cb->lastCompletedGpuTime() << "seconds (may refer to a previous frame)"; } delete ps; diff --git a/tests/manual/rhi/shared/examplefw.h b/tests/manual/rhi/shared/examplefw.h index 9879e8c5ba..75f48aa0c7 100644 --- a/tests/manual/rhi/shared/examplefw.h +++ b/tests/manual/rhi/shared/examplefw.h @@ -92,7 +92,7 @@ QString graphicsApiName() return QString(); } -QRhi::Flags rhiFlags = QRhi::EnableDebugMarkers; +QRhi::Flags rhiFlags = QRhi::EnableDebugMarkers | QRhi::EnableTimestamps; int sampleCount = 1; QRhiSwapChain::Flags scFlags; QRhi::BeginFrameFlags beginFrameFlags; diff --git a/tests/manual/rhi/triquadcube/triquadcube.cpp b/tests/manual/rhi/triquadcube/triquadcube.cpp index 81e6247680..3be0291779 100644 --- a/tests/manual/rhi/triquadcube/triquadcube.cpp +++ b/tests/manual/rhi/triquadcube/triquadcube.cpp @@ -35,7 +35,6 @@ struct { QSize lastOutputSize; int frameCount = 0; QFile profOut; - QVarLengthArray gpuFrameTimes; QElapsedTimer gpuFrameTimePrintTimer; } d; @@ -136,20 +135,8 @@ void Window::customInit() // With Vulkan at least we should see some details from the memory allocator. qDebug() << m_r->statistics(); - // Every two seconds try printing an average of the gpu frame times. + // Every two seconds try printing last known gpu frame time. d.gpuFrameTimePrintTimer.start(); - m_r->addGpuFrameTimeCallback([](float elapsedMs) { - d.gpuFrameTimes.append(elapsedMs); - if (d.gpuFrameTimePrintTimer.elapsed() > 2000) { - float at = 0.0f; - for (float t : d.gpuFrameTimes) - at += t; - at /= d.gpuFrameTimes.count(); - qDebug() << "Average GPU frame time" << at; - d.gpuFrameTimes.clear(); - d.gpuFrameTimePrintTimer.restart(); - } - }); } void Window::customRelease() @@ -170,6 +157,11 @@ void Window::customRender() const QSize outputSize = m_sc->currentPixelSize(); QRhiCommandBuffer *cb = m_sc->currentFrameCommandBuffer(); + if (d.gpuFrameTimePrintTimer.elapsed() > 2000) { + qDebug() << "Last completed GPU frame time" << cb->lastCompletedGpuTime() << "seconds"; + d.gpuFrameTimePrintTimer.restart(); + } + if (outputSize != d.lastOutputSize) { d.triRenderer.resize(outputSize); if (!d.triangleOnly) { -- cgit v1.2.1