Skip to content

[ET-VK] 4/n Split dispatches between multiple command buffers. Removing flush function call from submit_current_cmd_and_wait. #12525

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 19 commits into from
Jul 17, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
19 commits
Select commit Hold shift + click to select a range
bb3c625
[ET-VK] Fix caching mechanism to account for included files
SS-JIA Jul 14, 2025
5892e42
[ET-VK] Split up prepack command buffer
SS-JIA Jul 14, 2025
641157c
Update base for Update on "[ET-VK] Split up prepack command buffer"
SS-JIA Jul 14, 2025
e447656
Update on "[ET-VK] Split up prepack command buffer"
SS-JIA Jul 14, 2025
6bf4695
[ET-VK] 1/n Split dispatches between multiple command buffers. Add se…
trivedivivek Jul 15, 2025
261d821
[ET-VK] 2/n Split dispatches between multiple command buffers. Add se…
trivedivivek Jul 15, 2025
33d3e29
Update on "[ET-VK] 2/n Split dispatches between multiple command buff…
trivedivivek Jul 15, 2025
ec8cc51
[ET-VK] 3/n Split dispatches between multiple command buffers. Track …
trivedivivek Jul 15, 2025
c15cdef
[ET-VK] 4/n Split dispatches between multiple command buffers. Removi…
trivedivivek Jul 15, 2025
13df91b
Update base for Update on "[ET-VK] 4/n Split dispatches between multi…
trivedivivek Jul 15, 2025
60b67eb
Update on "[ET-VK] 4/n Split dispatches between multiple command buff…
trivedivivek Jul 15, 2025
a940cef
Update base for Update on "[ET-VK] 4/n Split dispatches between multi…
trivedivivek Jul 16, 2025
58ffddd
Update on "[ET-VK] 4/n Split dispatches between multiple command buff…
trivedivivek Jul 16, 2025
6e7eb7d
Update base for Update on "[ET-VK] 4/n Split dispatches between multi…
trivedivivek Jul 16, 2025
be74c21
Update on "[ET-VK] 4/n Split dispatches between multiple command buff…
trivedivivek Jul 16, 2025
ef207e0
Update base for Update on "[ET-VK] 4/n Split dispatches between multi…
trivedivivek Jul 17, 2025
877e765
Update on "[ET-VK] 4/n Split dispatches between multiple command buff…
trivedivivek Jul 17, 2025
07dc1a1
Update base for Update on "[ET-VK] 4/n Split dispatches between multi…
trivedivivek Jul 17, 2025
fce21fe
Update on "[ET-VK] 4/n Split dispatches between multiple command buff…
trivedivivek Jul 17, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 16 additions & 2 deletions backends/vulkan/runtime/api/Context.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ Context::Context(vkapi::Adapter* adapter, const ContextConfig& config)
querypool_(config_.query_pool_config, nullptr),
// Command buffer submission
cmd_mutex_{},
cmd_(VK_NULL_HANDLE, 0u),
cmd_(VK_NULL_HANDLE, VK_NULL_HANDLE, 0u),
prev_semaphore_(VK_NULL_HANDLE),
submit_count_{0u},
// Memory Management
buffer_clearlist_mutex_{},
Expand Down Expand Up @@ -195,10 +196,21 @@ void Context::register_blit(
}

void Context::submit_cmd_to_gpu(VkFence fence_handle, const bool final_use) {
// Wait semaphore would be previous command buffer's signal semaphore
VkSemaphore wait_semaphore = prev_semaphore_;
// Signal semaphore for the the current command buffer
VkSemaphore signal_semaphore = cmd_.get_signal_semaphore();
// Next command buffer would wait on this command buffer's signal semaphore
prev_semaphore_ = signal_semaphore;

if (cmd_) {
cmd_.end();
adapter_p_->submit_cmd(
queue_, cmd_.get_submit_handle(final_use), fence_handle);
queue_,
cmd_.get_submit_handle(final_use),
fence_handle,
wait_semaphore,
signal_semaphore);

submit_count_ = 0u;
}
Expand All @@ -214,6 +226,8 @@ void Context::flush() {
if (cmd_) {
cmd_.invalidate();
}
// Reset previous command buffer semaphore
prev_semaphore_ = VK_NULL_HANDLE;

std::lock_guard<std::mutex> bufferlist_lock(buffer_clearlist_mutex_);
std::lock_guard<std::mutex> imagelist_lock(image_clearlist_mutex_);
Expand Down
2 changes: 2 additions & 0 deletions backends/vulkan/runtime/api/Context.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,8 @@ class Context final {
// Command buffers submission
std::mutex cmd_mutex_;
vkapi::CommandBuffer cmd_;
// Semaphore for the previously submitted command buffer, if any
VkSemaphore prev_semaphore_;
uint32_t submit_count_;
// Memory Management
std::mutex buffer_clearlist_mutex_;
Expand Down
10 changes: 7 additions & 3 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -765,8 +765,6 @@ void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use);
fence.wait();
context_->fences().return_fence(fence);

context_->flush();
}

void ComputeGraph::prepack() {
Expand All @@ -787,7 +785,12 @@ void ComputeGraph::prepack() {
// Otherwise, just submit the current command buffer for execution and
// proceed. This results in lower load latency at the cost of higher peak
// memory usage.
reduce_peak_memory ? submit_current_cmd_and_wait() : submit_current_cmd();
if (reduce_peak_memory) {
submit_current_cmd_and_wait();
context_->flush();
} else {
submit_current_cmd();
}
staging_nbytes_in_cmd_ = 0;
context_->set_cmd();
submitted = true;
Expand All @@ -797,6 +800,7 @@ void ComputeGraph::prepack() {
i++;
}
submit_current_cmd_and_wait(/*final_use=*/true);
context_->flush();
staging_nbytes_in_cmd_ = 0;
}

Expand Down
3 changes: 1 addition & 2 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -847,8 +847,7 @@ class ComputeGraph final {

/*
* Submits the current command buffer in the Context to the GPU for execution,
* and wait for it to complete before returning. This function will also flush
* the Context after execution.
* and wait for it to complete before returning.
*/
void submit_current_cmd_and_wait(const bool final_use = false);

Expand Down
17 changes: 11 additions & 6 deletions backends/vulkan/runtime/vk_api/Adapter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -307,17 +307,22 @@ void Adapter::return_queue(Adapter::Queue& compute_queue) {
void Adapter::submit_cmd(
const Adapter::Queue& device_queue,
VkCommandBuffer cmd,
VkFence fence) {
VkFence fence,
VkSemaphore wait_semaphore,
VkSemaphore signal_semaphore) {
const VkPipelineStageFlags flags = VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT;
const bool set_wait_semaphore = wait_semaphore != VK_NULL_HANDLE;
const bool set_signal_semaphore = signal_semaphore != VK_NULL_HANDLE;
const VkSubmitInfo submit_info{
VK_STRUCTURE_TYPE_SUBMIT_INFO, // sType
nullptr, // pNext
0u, // waitSemaphoreCount
nullptr, // pWaitSemaphores
nullptr, // pWaitDstStageMask
set_wait_semaphore ? 1u : 0u, // waitSemaphoreCount
set_wait_semaphore ? &wait_semaphore : nullptr, // pWaitSemaphores
&flags, // pWaitDstStageMask
1u, // commandBufferCount
&cmd, // pCommandBuffers
0u, // signalSemaphoreCount
nullptr, // pSignalSemaphores
set_signal_semaphore ? 1u : 0u, // signalSemaphoreCount
set_signal_semaphore ? &signal_semaphore : nullptr, // pSignalSemaphores
};

std::lock_guard<std::mutex> queue_lock(
Expand Down
8 changes: 6 additions & 2 deletions backends/vulkan/runtime/vk_api/Adapter.h
Original file line number Diff line number Diff line change
Expand Up @@ -242,8 +242,12 @@ class Adapter final {

// Command Buffer Submission

void
submit_cmd(const Queue&, VkCommandBuffer, VkFence fence = VK_NULL_HANDLE);
void submit_cmd(
const Queue&,
VkCommandBuffer,
VkFence fence = VK_NULL_HANDLE,
VkSemaphore wait_semaphore = VK_NULL_HANDLE,
VkSemaphore signal_semaphore = VK_NULL_HANDLE);

std::string stringize() const;
friend std::ostream& operator<<(std::ostream&, const Adapter&);
Expand Down
29 changes: 27 additions & 2 deletions backends/vulkan/runtime/vk_api/Command.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,28 +20,34 @@ namespace vkapi {

CommandBuffer::CommandBuffer(
VkCommandBuffer handle,
VkSemaphore semaphore,
const VkCommandBufferUsageFlags flags)
: handle_(handle),
signal_semaphore_(semaphore),
flags_(flags),
state_(CommandBuffer::State::NEW),
bound_{} {}

CommandBuffer::CommandBuffer(CommandBuffer&& other) noexcept
: handle_(other.handle_),
signal_semaphore_(other.signal_semaphore_),
flags_(other.flags_),
state_(CommandBuffer::State::INVALID),
state_(other.state_),
bound_(other.bound_) {
other.handle_ = VK_NULL_HANDLE;
other.signal_semaphore_ = VK_NULL_HANDLE;
other.bound_.reset();
}

CommandBuffer& CommandBuffer::operator=(CommandBuffer&& other) noexcept {
handle_ = other.handle_;
signal_semaphore_ = other.signal_semaphore_;
flags_ = other.flags_;
state_ = other.state_;
bound_ = other.bound_;

other.handle_ = VK_NULL_HANDLE;
other.signal_semaphore_ = VK_NULL_HANDLE;
other.bound_.reset();
other.state_ = CommandBuffer::State::INVALID;

Expand Down Expand Up @@ -304,6 +310,12 @@ CommandPool::~CommandPool() {
if (pool_ == VK_NULL_HANDLE) {
return;
}
for (auto& semaphore : semaphores_) {
if (semaphore != VK_NULL_HANDLE) {
vkDestroySemaphore(device_, semaphore, nullptr);
}
}

vkDestroyCommandPool(device_, pool_, nullptr);
}

Expand All @@ -314,14 +326,15 @@ CommandBuffer CommandPool::get_new_cmd(bool reusable) {
allocate_new_batch(config_.cmd_pool_batch_size);

VkCommandBuffer handle = buffers_[in_use_];
VkSemaphore semaphore = semaphores_[in_use_];

VkCommandBufferUsageFlags cmd_flags = 0u;
if (!reusable) {
cmd_flags |= VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
}

in_use_++;
return CommandBuffer(handle, cmd_flags);
return CommandBuffer(handle, semaphore, cmd_flags);
}

void CommandPool::flush() {
Expand All @@ -337,6 +350,7 @@ void CommandPool::allocate_new_batch(const uint32_t count) {
}

buffers_.resize(buffers_.size() + count);
semaphores_.resize(buffers_.size() + count);

const VkCommandBufferAllocateInfo allocate_info{
VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // sType
Expand All @@ -348,6 +362,17 @@ void CommandPool::allocate_new_batch(const uint32_t count) {

VK_CHECK(vkAllocateCommandBuffers(
device_, &allocate_info, buffers_.data() + in_use_));

const VkSemaphoreCreateInfo semaphoreCreateInfo = {
VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO, nullptr, 0};

for (uint32_t i = 0; i < count; i++) {
VK_CHECK(vkCreateSemaphore(
device_,
&semaphoreCreateInfo,
nullptr,
semaphores_.data() + in_use_ + i));
}
}

} // namespace vkapi
Expand Down
14 changes: 13 additions & 1 deletion backends/vulkan/runtime/vk_api/Command.h
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,10 @@ namespace vkapi {

class CommandBuffer final {
public:
explicit CommandBuffer(VkCommandBuffer, const VkCommandBufferUsageFlags);
explicit CommandBuffer(
VkCommandBuffer,
VkSemaphore,
const VkCommandBufferUsageFlags);

CommandBuffer(const CommandBuffer&) = delete;
CommandBuffer& operator=(const CommandBuffer&) = delete;
Expand Down Expand Up @@ -70,6 +73,8 @@ class CommandBuffer final {

private:
VkCommandBuffer handle_;
// Semaphore to signal when the command buffer has completed execution
VkSemaphore signal_semaphore_;
VkCommandBufferUsageFlags flags_;
State state_;
Bound bound_;
Expand All @@ -81,6 +86,7 @@ class CommandBuffer final {

inline void invalidate() {
handle_ = VK_NULL_HANDLE;
signal_semaphore_ = VK_NULL_HANDLE;
bound_.reset();
}

Expand All @@ -100,6 +106,10 @@ class CommandBuffer final {

VkCommandBuffer get_submit_handle(const bool final_use = false);

VkSemaphore get_signal_semaphore() const {
return signal_semaphore_;
}

inline operator bool() const {
return handle_ != VK_NULL_HANDLE;
}
Expand Down Expand Up @@ -130,6 +140,8 @@ class CommandPool final {
// New Buffers
std::mutex mutex_;
std::vector<VkCommandBuffer> buffers_;
// Semaphores corresponding to the command buffers
std::vector<VkSemaphore> semaphores_;
size_t in_use_;

public:
Expand Down
Loading