From 740ba2e6b02a591fe989580699e4764223786533 Mon Sep 17 00:00:00 2001 From: Vivek Trivedi <5340687+trivedivivek@users.noreply.github.com> Date: Tue, 15 Jul 2025 15:30:12 -0700 Subject: [PATCH] [ET-VK] 5/n Split dispatches between multiple command buffers. Track previously submitted command buffers in context and add function to execute all previous command buffers. The diff adds changes to store command buffers submitted with final_use set to false. Storing these buffers is necessary for `execute()` function. Since, `encode_execute()` function is typically called once but `execute()` can be called multiple times, `submit_all_non_final_cmds` function is added so all recorded command buffers with `final_use = False` can be called multiple times in `execute()`. #### Key Changes * Added a flag `execute_pending_first_submission` to the `ComputeGraph` class to track whether execute nodes have been freshly encoded and need to be submitted first. * Added a new function `submit_all_non_final_cmds` to the `Context` class, which submits all non-final command buffers to the GPU. * Modified the `submit_cmd_to_gpu` function to add the submitted command buffer to the `non_final_cmds_` list if it's not marked as final use. * Updated the `execute` function in `ComputeGraph` to submit all non-final command buffers before executing the graph. Differential Revision: [D78360038](https://our.internmc.facebook.com/intern/diff/D78360038/) [ghstack-poisoned] --- backends/vulkan/runtime/api/Context.cpp | 30 +++++++++++++++++++ backends/vulkan/runtime/api/Context.h | 4 +++ .../vulkan/runtime/graph/ComputeGraph.cpp | 17 ++++++++--- backends/vulkan/runtime/graph/ComputeGraph.h | 4 +++ 4 files changed, 51 insertions(+), 4 deletions(-) diff --git a/backends/vulkan/runtime/api/Context.cpp b/backends/vulkan/runtime/api/Context.cpp index 64d940d44fb..9a04a94ad77 100644 --- a/backends/vulkan/runtime/api/Context.cpp +++ b/backends/vulkan/runtime/api/Context.cpp @@ -214,6 +214,31 @@ void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) { submit_count_ = 0u; } + + if (!final_use) { + non_final_cmds_.emplace_back(std::move(cmd_)); + } +} + +void Context::submit_all_non_final_cmds(VkFence fence_handle) { + VkSemaphore local_prev_semaphore = VK_NULL_HANDLE; + + for (uint32_t i = 0; i < non_final_cmds_.size(); i++) { + auto& cmd = non_final_cmds_[i]; + VkSemaphore wait_semaphore = local_prev_semaphore; + VkSemaphore signal_semaphore = cmd.get_signal_semaphore(); + local_prev_semaphore = signal_semaphore; + + if (cmd) { + cmd.end(); + adapter_p_->submit_cmd( + queue_, + cmd.get_submit_handle(false), + i == (non_final_cmds_.size() - 1) ? fence_handle : VK_NULL_HANDLE, + wait_semaphore, + signal_semaphore); + } + } } void Context::flush() { @@ -222,6 +247,11 @@ void Context::flush() { command_pool_.flush(); descriptor_pool_.flush(); + for (auto& cmd : non_final_cmds_) { + cmd.invalidate(); + } + non_final_cmds_.clear(); + // If there is an existing command buffer, invalidate it if (cmd_) { cmd_.invalidate(); diff --git a/backends/vulkan/runtime/api/Context.h b/backends/vulkan/runtime/api/Context.h index 9d8e7c92255..c6a1592455c 100644 --- a/backends/vulkan/runtime/api/Context.h +++ b/backends/vulkan/runtime/api/Context.h @@ -68,6 +68,8 @@ class Context final { // Command buffers submission std::mutex cmd_mutex_; vkapi::CommandBuffer cmd_; + // List of submitted command buffers, not marked as final use. + std::vector non_final_cmds_; // Semaphore for the previously submitted command buffer, if any VkSemaphore prev_semaphore_; uint32_t submit_count_; @@ -230,6 +232,8 @@ class Context final { VkFence fence_handle = VK_NULL_HANDLE, const bool final_use = false); + void submit_all_non_final_cmds(VkFence fence_handle = VK_NULL_HANDLE); + void flush(); #ifdef VULKAN_DEBUG diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp index e0e245c7641..8914f49a8ea 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.cpp +++ b/backends/vulkan/runtime/graph/ComputeGraph.cpp @@ -815,13 +815,22 @@ void ComputeGraph::encode_execute() { for (std::unique_ptr& node : execute_nodes_) { node->encode(this); } + + // Indicate execute nodes have been freshly encoded and needs to be submitted + // first + execute_pending_first_submission = true; } void ComputeGraph::execute() { - vkapi::VulkanFence fence = context_->fences().get_fence(); - context_->submit_cmd_to_gpu(fence.get_submit_handle()); - fence.wait(); - context_->fences().return_fence(fence); + if (execute_pending_first_submission) { + submit_current_cmd_and_wait(/*final_use=*/false); + execute_pending_first_submission = false; + } else { + vkapi::VulkanFence fence = context_->fences().get_fence(); + context_->submit_all_non_final_cmds(fence.get_submit_handle()); + fence.wait(); + context_->fences().return_fence(fence); + } execute_count_++; } diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h index 8de28f264fb..763aa42d274 100644 --- a/backends/vulkan/runtime/graph/ComputeGraph.h +++ b/backends/vulkan/runtime/graph/ComputeGraph.h @@ -198,6 +198,10 @@ class ComputeGraph final { // current Context's command buffer is submitted now. size_t staging_nbytes_in_cmd_ = 0; + // Flag to indicate if execute nodes have been freshly encoded and have not + // been submitted yet. + bool execute_pending_first_submission = true; + public: // // Accessors