Skip to content

Commit bfd39e9

Browse files
authored
[ET-VK] Split up prepack command buffer
Differential Revision: D78275586 Pull Request resolved: #12442
1 parent 13643ce commit bfd39e9

File tree

7 files changed

+111
-3
lines changed

7 files changed

+111
-3
lines changed

backends/vulkan/runtime/VulkanBackend.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -507,8 +507,7 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
507507
compute_graph->prepare();
508508
compute_graph->prepare_pipelines();
509509

510-
compute_graph->encode_prepack();
511-
compute_graph->prepack();
510+
compute_graph->run_prepack();
512511

513512
// If dynamic shapes are not expected, then the command buffer only needs to
514513
// be encoded once. Otherwise, wait until the first inference to encode the

backends/vulkan/runtime/graph/ComputeGraph.cpp

Lines changed: 51 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -145,7 +145,12 @@ ComputeGraph::ComputeGraph(GraphConfig config)
145145
execute_descriptor_counts_.descriptor_combined_sampler_count = 0;
146146
execute_descriptor_counts_.descriptor_storage_image_count = 0;
147147

148-
context_->set_cmd(/*reusable = */ true);
148+
// If certain graph config variables are not specified, then set them
149+
// automatically.
150+
if (config_.prepack_threshold_nbytes == 0) {
151+
config_.prepack_threshold_nbytes = 10 * MB;
152+
config_.prepack_initial_threshold_nbytes = 10 * MB;
153+
}
149154
}
150155

151156
ComputeGraph::~ComputeGraph() {
@@ -431,6 +436,7 @@ ValueRef ComputeGraph::add_tensorref(
431436
ValueRef idx(static_cast<int>(values_.size()));
432437
check_no_active_value_ptrs();
433438
values_.emplace_back(TensorRef(sizes, dtype, data));
439+
total_constant_nbytes_ += values_.back().toConstTensorRef().nbytes();
434440
return idx;
435441
}
436442

@@ -750,6 +756,19 @@ void ComputeGraph::prepare_pipelines() {
750756
vkapi::ComputePipelineCache::Hasher>();
751757
}
752758

759+
void ComputeGraph::submit_current_cmd(const bool final_use) {
760+
context_->submit_cmd_to_gpu(VK_NULL_HANDLE, final_use);
761+
}
762+
763+
void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
764+
vkapi::VulkanFence fence = context_->fences().get_fence();
765+
context_->submit_cmd_to_gpu(fence.get_submit_handle(), final_use);
766+
fence.wait();
767+
context_->fences().return_fence(fence);
768+
769+
context_->flush();
770+
}
771+
753772
void ComputeGraph::encode_prepack() {
754773
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
755774
node->encode(this);
@@ -766,6 +785,37 @@ void ComputeGraph::prepack() const {
766785
context_->flush();
767786
}
768787

788+
void ComputeGraph::run_prepack() {
789+
int i = 0;
790+
bool submitted = false;
791+
const bool reduce_peak_memory = total_constant_nbytes_ > 500 * MB;
792+
// int count = 0;
793+
context_->set_cmd();
794+
for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
795+
// Do not trigger on the first or last prepack node.
796+
const bool not_terminal = i != 0 && i != (prepack_nodes_.size() - 1);
797+
size_t threshold = submitted ? config_.prepack_threshold_nbytes
798+
: config_.prepack_initial_threshold_nbytes;
799+
if (not_terminal && staging_nbytes_in_cmd_ > threshold) {
800+
// If reducing peak memory usage, wait for the current command buffer to
801+
// finish executing and flush to recycle the staging memory. This will
802+
// reduce peak memory usage, but will slightly increase load latency.
803+
// Otherwise, just submit the current command buffer for execution and
804+
// proceed. This results in lower load latency at the cost of higher peak
805+
// memory usage.
806+
reduce_peak_memory ? submit_current_cmd_and_wait() : submit_current_cmd();
807+
staging_nbytes_in_cmd_ = 0;
808+
context_->set_cmd();
809+
submitted = true;
810+
}
811+
812+
node->encode(this);
813+
i++;
814+
}
815+
submit_current_cmd_and_wait(/*final_use=*/true);
816+
staging_nbytes_in_cmd_ = 0;
817+
}
818+
769819
void ComputeGraph::encode_execute() {
770820
context_->flush();
771821
context_->set_cmd(/*reusable = */ true);

backends/vulkan/runtime/graph/ComputeGraph.h

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -190,10 +190,20 @@ class ComputeGraph final {
190190
vkapi::ComputePipelineCache::Hasher>
191191
pipeline_descriptors_;
192192

193+
// Utility constexpr to express byte quantities
194+
constexpr static size_t MB = 1024 * 1024;
195+
193196
protected:
194197
size_t values_in_use_ = 0;
195198
size_t execute_count_ = 0;
196199

200+
// Total number of bytes needed to store model weights
201+
size_t total_constant_nbytes_ = 0;
202+
203+
// Represents the amount of staging buffer data that will be copied if the
204+
// current Context's command buffer is submitted now.
205+
size_t staging_nbytes_in_cmd_ = 0;
206+
197207
public:
198208
//
199209
// Accessors
@@ -812,13 +822,39 @@ class ComputeGraph final {
812822
copy_into_staging(const ValueRef idx, const void* data, const size_t numel);
813823
void copy_from_staging(const ValueRef idx, void* data, const size_t numel);
814824

825+
protected:
826+
// Command Buffer Management
827+
828+
/*
829+
* Submits the current command buffer in the Context to the GPU for execution.
830+
*/
831+
void submit_current_cmd(const bool final_use = false);
832+
833+
/*
834+
* Submits the current command buffer in the Context to the GPU for execution,
835+
* and wait for it to complete before returning. This function will also flush
836+
* the Context after execution.
837+
*/
838+
void submit_current_cmd_and_wait(const bool final_use = false);
839+
840+
public:
815841
//
816842
// Graph Prepacking
817843
//
818844

845+
inline void update_staging_nbytes_in_cmd(const size_t staging_bytes) {
846+
staging_nbytes_in_cmd_ += staging_bytes;
847+
}
848+
819849
void encode_prepack();
820850
void prepack() const;
821851

852+
/*
853+
* Executes prepacking operations to transfer model weight data from the CPU
854+
* to GPU.
855+
*/
856+
void run_prepack();
857+
822858
//
823859
// Graph Execution
824860
//

backends/vulkan/runtime/graph/GraphConfig.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,20 @@ struct GraphConfig final {
3636
// Whether or not the ComputeGraph should expect input shapes to be dynamic
3737
bool expect_dynamic_shapes;
3838

39+
// Execution properties that determine specifics re: how command buffer
40+
// submission is handled, etc. 0 means this field is not set.
41+
42+
// During prepacking, once this threshold is reached, submit the current
43+
// command buffer for execution. This allows the work to be distributed over
44+
// multiple command buffer submissions, which can improve model load
45+
// performance and prevent crashes when loading large models.
46+
size_t prepack_threshold_nbytes = 0;
47+
// Threshold used for the first command buffer submission during prepacking.
48+
// This can be set to be lower than prepack_submission_threshold_nbytes to
49+
// submit a command buffer for execution earlier which can improve performance
50+
// by taking more advantage of parallelism between the CPU and GPU.
51+
size_t prepack_initial_threshold_nbytes = 0;
52+
3953
vkapi::Adapter* external_adapter;
4054

4155
// Generate a default graph config with pre-configured settings

backends/vulkan/runtime/graph/containers/Constant.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ struct TensorRef final {
2828
const std::vector<int64_t>& t_sizes,
2929
vkapi::ScalarType t_dtype,
3030
const void* const t_data);
31+
32+
inline size_t nbytes() const {
33+
return utils::multiply_integers(sizes) * vkapi::element_size(dtype);
34+
}
3135
};
3236

3337
} // namespace vkcompute

backends/vulkan/runtime/graph/ops/PrepackNode.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ api::StagingBuffer PrepackNode::create_staging_buffer(ComputeGraph* graph) {
6262
TensorRefPtr tref = graph->get_tref(tref_);
6363
size_t numel = utils::multiply_integers(tref->sizes);
6464
api::StagingBuffer staging(graph->context(), tref->dtype, numel);
65+
graph->update_staging_nbytes_in_cmd(staging.buffer().mem_size_as_size_t());
6566
size_t nbytes = numel * vkapi::element_size(tref->dtype);
6667
staging.copy_from(tref->data, nbytes);
6768
return staging;

backends/vulkan/runtime/vk_api/memory/Buffer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,10 @@ class VulkanBuffer final {
138138
return buffer_properties_.size;
139139
}
140140

141+
inline size_t mem_size_as_size_t() const {
142+
return utils::safe_downcast<size_t>(mem_size());
143+
}
144+
141145
inline bool has_memory() const {
142146
return (memory_.allocation != VK_NULL_HANDLE);
143147
}

0 commit comments

Comments
 (0)