Skip to content

Commit 5aa78c7

Browse files
committed
[ET-VK] 5/n Split dispatches between multiple command buffers. Add support to defer command buffers in encode_execute function and submit all deferred commands in execute function.
Pull Request resolved: #12527 The diff adds changes to store command buffers submitted with final_use set to false. Storing these buffers is necessary for `execute()` function. Since, `encode_execute()` function is typically called once but `execute()` can be called multiple times, `submit_all_non_final_cmds` function is added so all recorded command buffers with `final_use = False` can be called multiple times in `execute()`. #### Key Changes * Added a flag `execute_pending_first_submission` to the `ComputeGraph` class to track whether execute nodes have been freshly encoded and need to be submitted first. * Added a new function `submit_all_non_final_cmds` to the `Context` class, which submits all non-final command buffers to the GPU. * Modified the `submit_cmd_to_gpu` function to add the submitted command buffer to the `non_final_cmds_` list if it's not marked as final use. * Updated the `execute` function in `ComputeGraph` to submit all non-final command buffers before executing the graph. ghstack-source-id: 296562130 @exported-using-ghexport Differential Revision: [D78360038](https://our.internmc.facebook.com/intern/diff/D78360038/)
1 parent 4094539 commit 5aa78c7

File tree

4 files changed

+44
-7
lines changed

4 files changed

+44
-7
lines changed

backends/vulkan/runtime/api/Context.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -217,7 +217,7 @@ void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) {
217217
}
218218

219219
void Context::flush() {
220-
VK_CHECK(vkQueueWaitIdle(queue()));
220+
VK_CHECK(vkQueueWaitIdle(queue().handle));
221221

222222
command_pool_.flush();
223223
descriptor_pool_.flush();

backends/vulkan/runtime/api/Context.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -90,8 +90,8 @@ class Context final {
9090
return device_;
9191
}
9292

93-
inline VkQueue queue() {
94-
return queue_.handle;
93+
inline vkapi::Adapter::Queue& queue() {
94+
return queue_;
9595
}
9696

9797
// Device Caches
@@ -230,6 +230,10 @@ class Context final {
230230
VkFence fence_handle = VK_NULL_HANDLE,
231231
const bool final_use = false);
232232

233+
vkapi::CommandBuffer& extract_cmd() {
234+
return cmd_;
235+
}
236+
233237
void flush();
234238

235239
#ifdef VULKAN_DEBUG

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 29 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -158,6 +158,7 @@ ComputeGraph::~ComputeGraph() {
158158

159159
prepack_nodes_.clear();
160160
execute_nodes_.clear();
161+
deferred_cmd_list_.clear();
161162

162163
context_->flush();
163164
}
@@ -767,6 +768,30 @@ void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
767768
context_->fences().return_fence(fence);
768769
}
769770

771+
void ComputeGraph::submit_deferred_cmds() {
772+
VkSemaphore prev_semaphore = VK_NULL_HANDLE;
773+
vkapi::VulkanFence fence = context_->fences().get_fence();
774+
775+
for (uint32_t i = 0; i < deferred_cmd_list_.size(); i++) {
776+
auto& cmd = deferred_cmd_list_[i];
777+
VkSemaphore wait_semaphore = prev_semaphore;
778+
VkSemaphore signal_semaphore = cmd.get_signal_semaphore();
779+
prev_semaphore = signal_semaphore;
780+
781+
if (cmd) {
782+
cmd.end();
783+
context_->adapter_ptr()->submit_cmd(
784+
context_->queue(),
785+
cmd.get_submit_handle(false),
786+
i == (deferred_cmd_list_.size() - 1) ? fence.get_submit_handle() : VK_NULL_HANDLE,
787+
wait_semaphore,
788+
signal_semaphore);
789+
}
790+
}
791+
fence.wait();
792+
context_->fences().return_fence(fence);
793+
}
794+
770795
void ComputeGraph::prepack() {
771796
int i = 0;
772797
bool submitted = false;
@@ -805,6 +830,7 @@ void ComputeGraph::prepack() {
805830
}
806831

807832
void ComputeGraph::encode_execute() {
833+
deferred_cmd_list_.clear();
808834
context_->flush();
809835
context_->set_cmd(/*reusable = */ true);
810836

@@ -813,13 +839,12 @@ void ComputeGraph::encode_execute() {
813839
for (std::unique_ptr<ExecuteNode>& node : execute_nodes_) {
814840
node->encode(this);
815841
}
842+
843+
deferred_cmd_list_.emplace_back(std::move(context_->extract_cmd()));
816844
}
817845

818846
void ComputeGraph::execute() {
819-
vkapi::VulkanFence fence = context_->fences().get_fence();
820-
context_->submit_cmd_to_gpu(fence.get_submit_handle());
821-
fence.wait();
822-
context_->fences().return_fence(fence);
847+
submit_deferred_cmds();
823848
execute_count_++;
824849
}
825850

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,9 @@ class ComputeGraph final {
193193
// Utility constexpr to express byte quantities
194194
constexpr static size_t MB = 1024 * 1024;
195195

196+
// List of command buffers deferred for submission
197+
std::vector<vkapi::CommandBuffer> deferred_cmd_list_;
198+
196199
protected:
197200
size_t values_in_use_ = 0;
198201
size_t execute_count_ = 0;
@@ -851,6 +854,11 @@ class ComputeGraph final {
851854
*/
852855
void submit_current_cmd_and_wait(const bool final_use = false);
853856

857+
/*
858+
* Submits all the commands gathered in deferred_cmd_bufs_ to the GPU.
859+
*/
860+
void submit_deferred_cmds();
861+
854862
public:
855863
//
856864
// Graph Prepacking

0 commit comments

Comments
 (0)