summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorLaszlo Agocs <laszlo.agocs@qt.io>2023-07-17 12:28:13 +0200
committerLaszlo Agocs <laszlo.agocs@qt.io>2023-07-31 12:38:40 +0200
commiteb99d19cc4346ac92f7876e94f245f786987391b (patch)
tree7f58a5b2cad76e071109ffc1b2a27b9607f70948 /src
parent95ed8d1fd9ccc5270b825550085c3a3df2830d5c (diff)
rhi: d3d12: Implement lastCompletedGpuTime via timestamp queries
Change-Id: I5f2588268cf4d52025f9b1c8d94cdcd9a742531c Reviewed-by: Andy Nichols <andy.nichols@qt.io> Reviewed-by: Qt CI Bot <qt_ci_bot@qt-project.org>
Diffstat (limited to 'src')
-rw-r--r--src/gui/rhi/qrhid3d12.cpp151
-rw-r--r--src/gui/rhi/qrhid3d12_p.h20
2 files changed, 168 insertions, 3 deletions
diff --git a/src/gui/rhi/qrhid3d12.cpp b/src/gui/rhi/qrhid3d12.cpp
index cbeef5c2cb0..0b5d7e38751 100644
--- a/src/gui/rhi/qrhid3d12.cpp
+++ b/src/gui/rhi/qrhid3d12.cpp
@@ -397,6 +397,9 @@ bool QRhiD3D12::create(QRhi::Flags flags)
qWarning("Could not create host-visible staging area");
return false;
}
+ QString decoratedName = QLatin1String("Small staging area buffer/");
+ decoratedName += QString::number(i);
+ smallStagingAreas[i].mem.buffer->SetName(reinterpret_cast<LPCWSTR>(decoratedName.utf16()));
}
if (!shaderVisibleCbvSrvUavHeap.create(dev,
@@ -407,6 +410,45 @@ bool QRhiD3D12::create(QRhi::Flags flags)
return false;
}
+ if (flags.testFlag(QRhi::EnableTimestamps)) {
+ static bool wantsStablePowerState = qEnvironmentVariableIntValue("QT_D3D_STABLE_POWER_STATE");
+ //
+ // https://learn.microsoft.com/en-us/windows/win32/api/d3d12/nf-d3d12-id3d12device-setstablepowerstate
+ //
+ // NB! This is a _global_ setting, affecting other processes (and 3D
+ // APIs such as Vulkan), as long as this application is running. Hence
+ // making it an env.var. for now. Never enable it in production. But
+ // extremely useful for the GPU timings with NVIDIA at least; the
+ // timestamps become stable and smooth, making the number readable and
+ // actually useful e.g. in Quick 3D's DebugView when this is enabled.
+ // (otherwise the number's all over the place)
+ //
+ // See also
+ // https://developer.nvidia.com/blog/advanced-api-performance-setstablepowerstate/
+ // for possible other approaches.
+ //
+ if (wantsStablePowerState)
+ dev->SetStablePowerState(TRUE);
+
+ hr = cmdQueue->GetTimestampFrequency(&timestampTicksPerSecond);
+ if (FAILED(hr)) {
+ qWarning("Failed to query timestamp frequency: %s",
+ qPrintable(QSystemError::windowsComString(hr)));
+ return false;
+ }
+ if (!timestampQueryHeap.create(dev, QD3D12_FRAMES_IN_FLIGHT * 2, D3D12_QUERY_HEAP_TYPE_TIMESTAMP)) {
+ qWarning("Failed to create timestamp query pool");
+ return false;
+ }
+ const quint32 readbackBufSize = QD3D12_FRAMES_IN_FLIGHT * 2 * sizeof(quint64);
+ if (!timestampReadbackArea.create(this, readbackBufSize, D3D12_HEAP_TYPE_READBACK)) {
+ qWarning("Failed to create timestamp readback buffer");
+ return false;
+ }
+ timestampReadbackArea.mem.buffer->SetName(L"Timestamp readback buffer");
+ memset(timestampReadbackArea.mem.p, 0, readbackBufSize);
+ }
+
D3D12_FEATURE_DATA_D3D12_OPTIONS3 options3 = {};
if (SUCCEEDED(dev->CheckFeatureSupport(D3D12_FEATURE_D3D12_OPTIONS3, &options3, sizeof(options3))))
caps.multiView = options3.ViewInstancingTier != D3D12_VIEW_INSTANCING_TIER_NOT_SUPPORTED;
@@ -439,6 +481,9 @@ void QRhiD3D12::destroy()
}
}
+ timestampQueryHeap.destroy();
+ timestampReadbackArea.destroy();
+
shaderVisibleCbvSrvUavHeap.destroy();
for (int i = 0; i < QD3D12_FRAMES_IN_FLIGHT; ++i)
@@ -574,7 +619,7 @@ bool QRhiD3D12::isFeatureSupported(QRhi::Feature feature) const
return false;
#endif
case QRhi::Timestamps:
- return false; // ###
+ return true;
case QRhi::Instancing:
return true;
case QRhi::CustomInstanceStepRate:
@@ -1389,8 +1434,24 @@ void QRhiD3D12::endExternal(QRhiCommandBuffer *cb)
double QRhiD3D12::lastCompletedGpuTime(QRhiCommandBuffer *cb)
{
- Q_UNUSED(cb);
- return 0;
+ QD3D12CommandBuffer *cbD = QRHI_RES(QD3D12CommandBuffer, cb);
+ return cbD->lastGpuTime;
+}
+
+static void calculateGpuTime(QD3D12CommandBuffer *cbD,
+ int timestampPairStartIndex,
+ const quint8 *readbackBufPtr,
+ quint64 timestampTicksPerSecond)
+{
+ const size_t byteOffset = timestampPairStartIndex * sizeof(quint64);
+ const quint64 *p = reinterpret_cast<const quint64 *>(readbackBufPtr + byteOffset);
+ const quint64 startTime = *p++;
+ const quint64 endTime = *p;
+ if (startTime < endTime) {
+ const quint64 ticks = endTime - startTime;
+ const double timeSec = ticks / double(timestampTicksPerSecond);
+ cbD->lastGpuTime = timeSec;
+ }
}
QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginFrameFlags flags)
@@ -1453,6 +1514,20 @@ QRhi::FrameOpResult QRhiD3D12::beginFrame(QRhiSwapChain *swapChain, QRhi::BeginF
finishActiveReadbacks(); // last, in case the readback-completed callback issues rhi calls
+ if (timestampQueryHeap.isValid() && timestampTicksPerSecond) {
+ // Read the timestamps for the previous frame for this slot. (the
+ // ResolveQuery() should have completed by now due to the wait above)
+ const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
+ calculateGpuTime(cbD,
+ timestampPairStartIndex,
+ timestampReadbackArea.mem.p,
+ timestampTicksPerSecond);
+ // Write the start timestamp for this frame for this slot.
+ cbD->cmdList->EndQuery(timestampQueryHeap.heap,
+ D3D12_QUERY_TYPE_TIMESTAMP,
+ timestampPairStartIndex);
+ }
+
return QRhi::FrameOpSuccess;
}
@@ -1477,6 +1552,19 @@ QRhi::FrameOpResult QRhiD3D12::endFrame(QRhiSwapChain *swapChain, QRhi::EndFrame
barrierGen.addTransitionBarrier(backBufferResourceHandle, D3D12_RESOURCE_STATE_PRESENT);
barrierGen.enqueueBufferedTransitionBarriers(cbD);
+ if (timestampQueryHeap.isValid()) {
+ const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
+ cbD->cmdList->EndQuery(timestampQueryHeap.heap,
+ D3D12_QUERY_TYPE_TIMESTAMP,
+ timestampPairStartIndex + 1);
+ cbD->cmdList->ResolveQueryData(timestampQueryHeap.heap,
+ D3D12_QUERY_TYPE_TIMESTAMP,
+ timestampPairStartIndex,
+ 2,
+ timestampReadbackArea.mem.buffer,
+ timestampPairStartIndex * sizeof(quint64));
+ }
+
ID3D12GraphicsCommandList1 *cmdList = cbD->cmdList;
HRESULT hr = cmdList->Close();
if (FAILED(hr)) {
@@ -1561,6 +1649,12 @@ QRhi::FrameOpResult QRhiD3D12::beginOffscreenFrame(QRhiCommandBuffer **cb, QRhi:
bindShaderVisibleHeaps(cbD);
+ if (timestampQueryHeap.isValid() && timestampTicksPerSecond) {
+ cbD->cmdList->EndQuery(timestampQueryHeap.heap,
+ D3D12_QUERY_TYPE_TIMESTAMP,
+ currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT);
+ }
+
offscreenActive = true;
*cb = cbD;
@@ -1574,6 +1668,19 @@ QRhi::FrameOpResult QRhiD3D12::endOffscreenFrame(QRhi::EndFrameFlags flags)
offscreenActive = false;
QD3D12CommandBuffer *cbD = offscreenCb[currentFrameSlot];
+ if (timestampQueryHeap.isValid()) {
+ const int timestampPairStartIndex = currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT;
+ cbD->cmdList->EndQuery(timestampQueryHeap.heap,
+ D3D12_QUERY_TYPE_TIMESTAMP,
+ timestampPairStartIndex + 1);
+ cbD->cmdList->ResolveQueryData(timestampQueryHeap.heap,
+ D3D12_QUERY_TYPE_TIMESTAMP,
+ timestampPairStartIndex,
+ 2,
+ timestampReadbackArea.mem.buffer,
+ timestampPairStartIndex * sizeof(quint64));
+ }
+
ID3D12GraphicsCommandList1 *cmdList = cbD->cmdList;
HRESULT hr = cmdList->Close();
if (FAILED(hr)) {
@@ -1594,6 +1701,14 @@ QRhi::FrameOpResult QRhiD3D12::endOffscreenFrame(QRhi::EndFrameFlags flags)
// previous) frame is safe since we waited for completion above.
finishActiveReadbacks(true);
+ // the timestamp query results should be available too, given the wait
+ if (timestampQueryHeap.isValid()) {
+ calculateGpuTime(cbD,
+ currentFrameSlot * QD3D12_FRAMES_IN_FLIGHT,
+ timestampReadbackArea.mem.p,
+ timestampTicksPerSecond);
+ }
+
return QRhi::FrameOpSuccess;
}
@@ -2058,6 +2173,36 @@ void QD3D12CpuDescriptorPool::release(const QD3D12Descriptor &descriptor, quint3
quint64(descriptor.cpuHandle.ptr));
}
+bool QD3D12QueryHeap::create(ID3D12Device *device,
+ quint32 queryCount,
+ D3D12_QUERY_HEAP_TYPE heapType)
+{
+ capacity = queryCount;
+
+ D3D12_QUERY_HEAP_DESC heapDesc = {};
+ heapDesc.Type = heapType;
+ heapDesc.Count = capacity;
+
+ HRESULT hr = device->CreateQueryHeap(&heapDesc, __uuidof(ID3D12QueryHeap), reinterpret_cast<void **>(&heap));
+ if (FAILED(hr)) {
+ qWarning("Failed to create query heap: %s", qPrintable(QSystemError::windowsComString(hr)));
+ heap = nullptr;
+ capacity = 0;
+ return false;
+ }
+
+ return true;
+}
+
+void QD3D12QueryHeap::destroy()
+{
+ if (heap) {
+ heap->Release();
+ heap = nullptr;
+ }
+ capacity = 0;
+}
+
bool QD3D12StagingArea::create(QRhiD3D12 *rhi, quint32 capacity, D3D12_HEAP_TYPE heapType)
{
Q_ASSERT(heapType == D3D12_HEAP_TYPE_UPLOAD || heapType == D3D12_HEAP_TYPE_READBACK);
diff --git a/src/gui/rhi/qrhid3d12_p.h b/src/gui/rhi/qrhid3d12_p.h
index bfc2f530dbd..5f9253c7a75 100644
--- a/src/gui/rhi/qrhid3d12_p.h
+++ b/src/gui/rhi/qrhid3d12_p.h
@@ -119,6 +119,18 @@ struct QD3D12CpuDescriptorPool
const char *debugName;
};
+struct QD3D12QueryHeap
+{
+ bool isValid() const { return heap && capacity; }
+ bool create(ID3D12Device *device,
+ quint32 queryCount,
+ D3D12_QUERY_HEAP_TYPE heapType);
+ void destroy();
+
+ ID3D12QueryHeap *heap = nullptr;
+ quint32 capacity = 0;
+};
+
struct QD3D12StagingArea
{
static const quint32 ALIGNMENT = D3D12_TEXTURE_DATA_PLACEMENT_ALIGNMENT; // 512 so good enough both for cb and texdata
@@ -931,9 +943,11 @@ struct QD3D12CommandBuffer : public QRhiCommandBuffer
currentVertexOffsets = {};
}
+ // per-frame
PassType recordingPass;
QRhiRenderTarget *currentTarget;
+ // per-pass
QD3D12GraphicsPipeline *currentGraphicsPipeline;
QD3D12ComputePipeline *currentComputePipeline;
uint currentPipelineGeneration;
@@ -945,6 +959,9 @@ struct QD3D12CommandBuffer : public QRhiCommandBuffer
DXGI_FORMAT currentIndexFormat;
std::array<QD3D12ObjectHandle, D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT> currentVertexBuffers;
std::array<quint32, D3D12_IA_VERTEX_INPUT_RESOURCE_SLOT_COUNT> currentVertexOffsets;
+
+ // global
+ double lastGpuTime = 0;
};
struct QD3D12SwapChain : public QRhiSwapChain
@@ -1169,6 +1186,9 @@ public:
QD3D12MipmapGenerator mipmapGen;
QD3D12StagingArea smallStagingAreas[QD3D12_FRAMES_IN_FLIGHT];
QD3D12ShaderVisibleDescriptorHeap shaderVisibleCbvSrvUavHeap;
+ UINT64 timestampTicksPerSecond = 0;
+ QD3D12QueryHeap timestampQueryHeap;
+ QD3D12StagingArea timestampReadbackArea;
IDCompositionDevice *dcompDevice = nullptr;
QD3D12SwapChain *currentSwapChain = nullptr;
QSet<QD3D12SwapChain *> swapchains;