diff --git a/renderdoc/driver/vulkan/vk_core.cpp b/renderdoc/driver/vulkan/vk_core.cpp index 5c68a31069..ed21ff4f5b 100644 --- a/renderdoc/driver/vulkan/vk_core.cpp +++ b/renderdoc/driver/vulkan/vk_core.cpp @@ -705,6 +705,84 @@ void WrappedVulkan::SetDebugMessageSink(WrappedVulkan::ScopedDebugMessageSink *s Threading::SetTLSValue(debugMessageSinkTLSSlot, (void *)sink); } +void WrappedVulkan::InsertPendingCommandBufferCallbacksEvent(VkCommandBuffer commandBuffer) +{ + // This occurs pre-baking as the event needs to be in the command buffer before vkEndCommandBuffer + // is called + + VkResourceRecord *cmdRecord = GetRecord(commandBuffer); + VkPendingSubmissionCompleteCallbacks *pending = + cmdRecord->cmdInfo->pendingSubmissionCompleteCallbacks; + RDCASSERT(pending->event == VK_NULL_HANDLE); + + if(pending->callbacks.empty()) + return; + + const VkEventCreateInfo info = {VK_STRUCTURE_TYPE_EVENT_CREATE_INFO}; + VkEvent event; + const VkResult vkr = ObjDisp(m_Device)->CreateEvent(Unwrap(m_Device), &info, NULL, &event); + CheckVkResult(vkr); + + ObjDisp(commandBuffer)->CmdSetEvent(Unwrap(commandBuffer), event, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT); + + pending->device = cmdRecord->cmdInfo->device; + pending->event = event; +} + +void WrappedVulkan::AddPendingCommandBufferCallbacks(VkCommandBuffer commandBuffer) +{ + VkResourceRecord *cmdRecord = GetRecord(commandBuffer); + VkPendingSubmissionCompleteCallbacks *pending = + cmdRecord->bakedCommands->cmdInfo->pendingSubmissionCompleteCallbacks; + + if(pending->callbacks.empty()) + return; + + RDCASSERT(pending->event != VK_NULL_HANDLE); + + pending->AddRef(); + + SCOPED_LOCK(m_PendingCmdBufferCallbacksLock); + m_PendingCmdBufferCallbacks.push_back(pending); +} + +void WrappedVulkan::CheckPendingCommandBufferCallbacks() +{ + // This approach is bad for contention, so a future optimisation could be to: + // 1. Acquire the lock + // 2. Move m_PendingCmdBufferCallbacks into a local + // 3. Release the lock + // 4. Do the checks/execution + // 5. Acquire the lock + // 6. Merge any remaining entries to m_PendingCmdBufferCallbacks (which may have accumulated new + // entries from other threads) + // 7. Release the lock + + SCOPED_LOCK(m_PendingCmdBufferCallbacksLock); + + for(size_t i = 0; i < m_PendingCmdBufferCallbacks.size();) + { + VkPendingSubmissionCompleteCallbacks *pending = m_PendingCmdBufferCallbacks[i]; + + const VkResult vkr = ObjDisp(m_Device)->GetEventStatus(Unwrap(m_Device), pending->event); + if(vkr == VK_EVENT_SET) + { + for(std::function &f : pending->callbacks) + f(); + + pending->Release(); + m_PendingCmdBufferCallbacks.erase(i); + continue; + } + else if(vkr != VK_EVENT_RESET) + { + CheckVkResult(vkr); + } + + ++i; + } +} + byte *WrappedVulkan::GetRingTempMemory(size_t s) { TempMem *mem = (TempMem *)Threading::GetTLSValue(tempMemoryTLSSlot); @@ -2268,6 +2346,7 @@ void WrappedVulkan::StartFrameCapture(DeviceOwnedWindow devWnd) } m_PreparedNotSerialisedInitStates.clear(); + CheckPendingCommandBufferCallbacks(); GetResourceManager()->PrepareInitialContents(); { diff --git a/renderdoc/driver/vulkan/vk_core.h b/renderdoc/driver/vulkan/vk_core.h index e0b8078a97..fe083fb50a 100644 --- a/renderdoc/driver/vulkan/vk_core.h +++ b/renderdoc/driver/vulkan/vk_core.h @@ -979,6 +979,12 @@ class WrappedVulkan : public IFrameCapturer bytebuf m_MaskedMapData; + Threading::CriticalSection m_PendingCmdBufferCallbacksLock; + rdcarray m_PendingCmdBufferCallbacks; + void InsertPendingCommandBufferCallbacksEvent(VkCommandBuffer commandBuffer); + void AddPendingCommandBufferCallbacks(VkCommandBuffer commandBuffer); + void CheckPendingCommandBufferCallbacks(); + GPUAddressRangeTracker m_AddressTracker; GPUAddressRange CreateAddressRange(VkDevice device, VkBuffer buffer); diff --git a/renderdoc/driver/vulkan/vk_resources.cpp b/renderdoc/driver/vulkan/vk_resources.cpp index 335b414c5e..243ce48621 100644 --- a/renderdoc/driver/vulkan/vk_resources.cpp +++ b/renderdoc/driver/vulkan/vk_resources.cpp @@ -3391,6 +3391,19 @@ VkImageAspectFlags FormatImageAspects(VkFormat fmt) return VK_IMAGE_ASPECT_COLOR_BIT; } +void VkPendingSubmissionCompleteCallbacks::Release() +{ + int32_t ref = Atomic::Dec32(&refCount); + RDCASSERT(ref >= 0); + if(ref <= 0) + { + if(event != VK_NULL_HANDLE) + ObjDisp(device)->DestroyEvent(Unwrap(device), event, NULL); + + delete this; + } +} + RenderPassInfo::RenderPassInfo(const VkRenderPassCreateInfo &ci) { // *2 in case we need separate barriers for depth and stencil, +1 for the terminating null @@ -3843,12 +3856,12 @@ InitReqType ImgRefs::SubresourceRangeMaxInitReq(VkImageSubresourceRange range, I return initReq; } -rdcarray > ImgRefs::SubresourceRangeInitReqs( +rdcarray> ImgRefs::SubresourceRangeInitReqs( VkImageSubresourceRange range, InitPolicy policy, bool initialized) const { VkImageSubresourceRange out(range); - rdcarray > res; - rdcarray > splitAspects; + rdcarray> res; + rdcarray> splitAspects; if(areAspectsSplit) { int aspectIndex = 0; @@ -4825,7 +4838,7 @@ TEST_CASE("Vulkan formats", "[format][vulkan]") { const uint32_t width = 24, height = 24; - rdcarray > > tests = { + rdcarray>> tests = { {VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM, {576, 144, 144}}, {VK_FORMAT_G8_B8R8_2PLANE_420_UNORM, {576, 288}}, {VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM, {576, 288, 288}}, @@ -4852,7 +4865,7 @@ TEST_CASE("Vulkan formats", "[format][vulkan]") {VK_FORMAT_G16_B16R16_2PLANE_444_UNORM, {1152, 2304}}, }; - for(rdcpair > e : tests) + for(rdcpair> e : tests) { INFO("Format is " << ToStr(e.first)); for(uint32_t p = 0; p < e.second.size(); p++) diff --git a/renderdoc/driver/vulkan/vk_resources.h b/renderdoc/driver/vulkan/vk_resources.h index f70c8b34b6..0ad5965b14 100644 --- a/renderdoc/driver/vulkan/vk_resources.h +++ b/renderdoc/driver/vulkan/vk_resources.h @@ -1088,6 +1088,24 @@ struct MemRefs struct ImgRefs; struct ImageState; +class VkPendingSubmissionCompleteCallbacks +{ +public: + VkPendingSubmissionCompleteCallbacks() = default; + VkPendingSubmissionCompleteCallbacks(const VkPendingSubmissionCompleteCallbacks &) = delete; + VkPendingSubmissionCompleteCallbacks(VkPendingSubmissionCompleteCallbacks &&) = delete; + + void AddRef() { Atomic::Inc32(&refCount); } + void Release(); + + VkDevice device = VK_NULL_HANDLE; + VkEvent event = VK_NULL_HANDLE; + rdcarray> callbacks; + +private: + int32_t refCount = 1; +}; + struct CmdPoolInfo { CmdPoolInfo() : pool(4 * 1024) {} @@ -1104,13 +1122,19 @@ struct CmdPoolInfo struct CmdBufferRecordingInfo { - CmdBufferRecordingInfo(CmdPoolInfo &pool) : alloc(pool.pool) {} + CmdBufferRecordingInfo(CmdPoolInfo &pool) + : alloc(pool.pool), + pendingSubmissionCompleteCallbacks(new VkPendingSubmissionCompleteCallbacks()) + { + } CmdBufferRecordingInfo(const CmdBufferRecordingInfo &) = delete; CmdBufferRecordingInfo(CmdBufferRecordingInfo &&) = delete; CmdBufferRecordingInfo &operator=(const CmdBufferRecordingInfo &) = delete; ~CmdBufferRecordingInfo() { // nothing to do explicitly, the alloc destructor will clean up any pages it holds + // pendingSubmissionCompleteCallbacks manages itself via ref-counting + pendingSubmissionCompleteCallbacks->Release(); } VkDevice device; @@ -1144,6 +1168,9 @@ struct CmdBufferRecordingInfo // A list of acceleration structures that this command buffer will build or copy rdcarray accelerationStructures; + // The VkEvent and the list of callbacks to be executed once it has been signalled + VkPendingSubmissionCompleteCallbacks *pendingSubmissionCompleteCallbacks = NULL; + // AdvanceFrame/Present should be called after this buffer is submitted bool present; // BeginFrameCapture should be called *before* this buffer is submitted. @@ -2247,6 +2274,8 @@ struct VkResourceRecord : public ResourceRecord cmdInfo->imageStates.swap(bakedCommands->cmdInfo->imageStates); cmdInfo->memFrameRefs.swap(bakedCommands->cmdInfo->memFrameRefs); cmdInfo->accelerationStructures.swap(bakedCommands->cmdInfo->accelerationStructures); + std::swap(cmdInfo->pendingSubmissionCompleteCallbacks, + bakedCommands->cmdInfo->pendingSubmissionCompleteCallbacks); } // we have a lot of 'cold' data in the resource record, as it can be accessed diff --git a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp index 2c93d9ba7f..97087c0150 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_cmd_funcs.cpp @@ -1378,7 +1378,7 @@ VkResult WrappedVulkan::vkBeginCommandBuffer(VkCommandBuffer commandBuffer, if(record) { - // If a command bfufer was already recorded (ie we have some baked commands), + // If a command buffer was already recorded (ie we have some baked commands), // then begin is spec'd to implicitly reset. That means we need to tidy up // any existing baked commands before creating a new set. if(record->bakedCommands) @@ -1670,6 +1670,9 @@ VkResult WrappedVulkan::vkEndCommandBuffer(VkCommandBuffer commandBuffer) VkResourceRecord *record = GetRecord(commandBuffer); RDCASSERT(record); + if(IsCaptureMode(m_State)) + InsertPendingCommandBufferCallbacksEvent(commandBuffer); + VkResult ret; SERIALISE_TIME_CALL(ret = ObjDisp(commandBuffer)->EndCommandBuffer(Unwrap(commandBuffer))); diff --git a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp index e67eaf45de..4e0c06451e 100644 --- a/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp +++ b/renderdoc/driver/vulkan/wrappers/vk_queue_funcs.cpp @@ -1043,6 +1043,8 @@ void WrappedVulkan::CaptureQueueSubmit(VkQueue queue, record->bakedCommands->AddRef(); } + + AddPendingCommandBufferCallbacks(commandBuffers[i]); } if(backframe) @@ -1337,6 +1339,8 @@ void WrappedVulkan::CaptureQueueSubmit(VkQueue queue, for(VkResourceRecord *asRecord : accelerationStructures) asRecord->accelerationStructureInfo->accelerationStructureBuilt = true; + + CheckPendingCommandBufferCallbacks(); } template