Skip to content

[ET-VK] Split up prepack command buffer #12442

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 16, 2025
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions backends/vulkan/runtime/VulkanBackend.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -507,8 +507,7 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
compute_graph->prepare();
compute_graph->prepare_pipelines();

compute_graph->encode_prepack();
compute_graph->prepack();
compute_graph->run_prepack();

// If dynamic shapes are not expected, then the command buffer only needs to
// be encoded once. Otherwise, wait until the first inference to encode the
Expand Down
40 changes: 40 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,15 @@ ComputeGraph::ComputeGraph(GraphConfig config)
execute_descriptor_counts_.descriptor_combined_sampler_count = 0;
execute_descriptor_counts_.descriptor_storage_image_count = 0;

#define MB (1024.0 * 1024.0)
// If certain graph config variables are not specified, then set them
// automatically.
if (config_.prepack_threshold_nbytes == 0) {
config_.prepack_threshold_nbytes = 20 * MB;
config_.prepack_initial_threshold_nbytes = 20 * MB;
}
#undef MB

context_->set_cmd(/*reusable = */ true);
}

Expand Down Expand Up @@ -750,6 +759,15 @@ void ComputeGraph::prepare_pipelines() {
vkapi::ComputePipelineCache::Hasher>();
}

void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
vkapi::VulkanFence fence = context_->fences().get_fence();
context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use);
fence.wait();
context_->fences().return_fence(fence);

context_->flush();
}

void ComputeGraph::encode_prepack() {
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
node->encode(this);
Expand All @@ -766,6 +784,28 @@ void ComputeGraph::prepack() const {
context_->flush();
}

void ComputeGraph::run_prepack() {
int i = 0;
bool submitted = false;
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
// Do not trigger on the first or last prepack node.
const bool not_terminal = i != 0 && i != (prepack_nodes_.size() - 1);
size_t threshold = submitted ? config_.prepack_threshold_nbytes
: config_.prepack_initial_threshold_nbytes;
if (not_terminal && staging_nbytes_in_cmd_ > threshold) {
submit_current_cmd_and_wait(/*final_use=*/true);
staging_nbytes_in_cmd_ = 0;
context_->set_cmd();
submitted = true;
}

node->encode(this);
i++;
}
submit_current_cmd_and_wait(/*final_use=*/true);
staging_nbytes_in_cmd_ = 0;
}

void ComputeGraph::encode_execute() {
context_->flush();
context_->set_cmd(/*reusable = */ true);
Expand Down
25 changes: 25 additions & 0 deletions backends/vulkan/runtime/graph/ComputeGraph.h
Original file line number Diff line number Diff line change
Expand Up @@ -194,6 +194,10 @@ class ComputeGraph final {
size_t values_in_use_ = 0;
size_t execute_count_ = 0;

// Represents the amount of staging buffer data that will be copied if the
// current Context's command buffer is submitted now.
size_t staging_nbytes_in_cmd_ = 0;

public:
//
// Accessors
Expand Down Expand Up @@ -812,13 +816,34 @@ class ComputeGraph final {
copy_into_staging(const ValueRef idx, const void* data, const size_t numel);
void copy_from_staging(const ValueRef idx, void* data, const size_t numel);

protected:
// Command Buffer Management

/*
* Submits the current command buffer in the Context to the GPU for execution,
* and wait for it to complete before returning. This function will also flush
* the Context after execution.
*/
void submit_current_cmd_and_wait(const bool final_use = false);

public:
//
// Graph Prepacking
//

inline void update_staging_nbytes_in_cmd(const size_t staging_bytes) {
staging_nbytes_in_cmd_ += staging_bytes;
}

void encode_prepack();
void prepack() const;

/*
* Executes prepacking operations to transfer model weight data from the CPU
* to GPU.
*/
void run_prepack();

//
// Graph Execution
//
Expand Down
14 changes: 14 additions & 0 deletions backends/vulkan/runtime/graph/GraphConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,20 @@ struct GraphConfig final {
// Whether or not the ComputeGraph should expect input shapes to be dynamic
bool expect_dynamic_shapes;

// Execution properties that determine specifics re: how command buffer
// submission is handled, etc. 0 means this field is not set.

// During prepacking, once this threshold is reached, submit the current
// command buffer for execution. This allows the work to be distributed over
// multiple command buffer submissions, which can improve model load
// performance and prevent crashes when loading large models.
size_t prepack_threshold_nbytes = 0;
// Threshold used for the first command buffer submission during prepacking.
// This can be set to be lower than prepack_submission_threshold_nbytes to
// submit a command buffer for execution earlier which can improve performance
// by taking more advantage of parallelism between the CPU and GPU.
size_t prepack_initial_threshold_nbytes = 0;

vkapi::Adapter* external_adapter;

// Generate a default graph config with pre-configured settings
Expand Down
1 change: 1 addition & 0 deletions backends/vulkan/runtime/graph/ops/PrepackNode.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,7 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
TensorRefPtr tref = graph->get_tref(tref_);
size_t numel = utils::multiply_integers(tref->sizes);
api::StagingBuffer staging(graph->context(), tref->dtype, numel);
graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
size_t nbytes = numel * vkapi::element_size(tref->dtype);
staging.copy_from(tref->data, nbytes);
return staging;
Expand Down
4 changes: 4 additions & 0 deletions backends/vulkan/runtime/vk_api/memory/Buffer.h
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,10 @@ class VulkanBuffer final {
return buffer_properties_.size;
}

inline size_t mem_size_as_size_t() const {
return utils::safe_downcast<size_t>(mem_size());
}

inline bool has_memory() const {
return (memory_.allocation != VK_NULL_HANDLE);
}
Expand Down
Loading