[ET-VK][ez] Rename run_prepack() to prepack() and replace encode_prepack() + prepack() with just prepack() (#12534)

pytorchbot · lucylq · commit 9e6855358de8 · 2025-07-17T10:31:32.000-07:00
Title says it all! See below diff for more context on why this new API exists. Differential Revision: [D78275583](https://our.internmc.facebook.com/intern/diff/D78275583/)
diff --git a/backends/vulkan/runtime/VulkanBackend.cpp b/backends/vulkan/runtime/VulkanBackend.cpp
@@ -507,7 +507,7 @@ class VulkanBackend final : public ::executorch::runtime::BackendInterface {
     compute_graph->prepare();
     compute_graph->prepare_pipelines();
 
-    compute_graph->run_prepack();
+    compute_graph->prepack();
 
     // If dynamic shapes are not expected, then the command buffer only needs to
     // be encoded once. Otherwise, wait until the first inference to encode the
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.cpp b/backends/vulkan/runtime/graph/ComputeGraph.cpp
@@ -769,23 +769,7 @@ void ComputeGraph::submit_current_cmd_and_wait(const bool final_use) {
   context_->flush();
 }
 
-void ComputeGraph::encode_prepack() {
-  for (std::unique_ptr<PrepackNode>& node : prepack_nodes_) {
-    node->encode(this);
-  }
-}
-
-void ComputeGraph::prepack() const {
-  // Submit and execute the command buffer
-  vkapi::VulkanFence fence = context_->fences().get_fence();
-  context_->submit_cmd_to_gpu(fence.get_submit_handle(), /*final_use = */ true);
-  fence.wait();
-  context_->fences().return_fence(fence);
-
-  context_->flush();
-}
-
-void ComputeGraph::run_prepack() {
+void ComputeGraph::prepack() {
   int i = 0;
   bool submitted = false;
   const bool reduce_peak_memory = total_constant_nbytes_ > 500 * MB;
diff --git a/backends/vulkan/runtime/graph/ComputeGraph.h b/backends/vulkan/runtime/graph/ComputeGraph.h
@@ -861,14 +861,11 @@ class ComputeGraph final {
     staging_nbytes_in_cmd_ += staging_bytes;
   }
 
-  void encode_prepack();
-  void prepack() const;
-
   /*
    * Executes prepacking operations to transfer model weight data from the CPU
    * to GPU.
    */
-  void run_prepack();
+  void prepack();
 
   //
   // Graph Execution
diff --git a/backends/vulkan/test/op_tests/choose_qparams_test.cpp b/backends/vulkan/test/op_tests/choose_qparams_test.cpp
@@ -456,7 +456,7 @@ void test_vulkan_choose_qparams_tensor_impl(
   ValueRef staging_zero_point = graph.set_output_tensor(r_zero_point);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -676,7 +676,7 @@ void test_vulkan_choose_qparams_per_token_asymmetric_impl(
   ValueRef staging_zero_point = graph.set_output_tensor(r_zero_point);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
diff --git a/backends/vulkan/test/op_tests/dequantize_test.cpp b/backends/vulkan/test/op_tests/dequantize_test.cpp
@@ -1138,7 +1138,7 @@ void test_vulkan_dequantize_per_token_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -1670,7 +1670,6 @@ void test_vulkan_dequantize_per_channel_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
   graph.prepack();
   graph.encode_execute();
 
@@ -2345,7 +2344,6 @@ void test_vulkan_dequantize_per_tensor_tensor_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
   graph.prepack();
   graph.encode_execute();
 
diff --git a/backends/vulkan/test/op_tests/quantize_test.cpp b/backends/vulkan/test/op_tests/quantize_test.cpp
@@ -929,7 +929,7 @@ void test_vulkan_quantize_per_token_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -1412,7 +1412,6 @@ void test_vulkan_quantize_per_channel_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
   graph.prepack();
   graph.encode_execute();
 
@@ -2042,7 +2041,6 @@ void test_vulkan_quantize_per_tensor_tensor_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
   graph.prepack();
   graph.encode_execute();
 
diff --git a/backends/vulkan/test/op_tests/quantized_linear_test.cpp b/backends/vulkan/test/op_tests/quantized_linear_test.cpp
@@ -454,7 +454,7 @@ void test_vulkan_linear_qga4w_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -549,7 +549,7 @@ void test_vulkan_linear_qcs4w_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -683,7 +683,7 @@ void test_vulkan_linear_qta8a_qga4w_impl(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
diff --git a/backends/vulkan/test/op_tests/rotary_embedding_test.cpp b/backends/vulkan/test/op_tests/rotary_embedding_test.cpp
@@ -112,7 +112,7 @@ void test_reference(
   ValueRef staging_xk_out = graph.set_output_tensor(r_xk_out);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
diff --git a/backends/vulkan/test/op_tests/sdpa_test.cpp b/backends/vulkan/test/op_tests/sdpa_test.cpp
@@ -350,7 +350,7 @@ void test_vulkan_sdpa(
   ValueRef staging_out = graph.set_output_tensor(r_out);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
diff --git a/backends/vulkan/test/op_tests/utils/gen_computegraph.py b/backends/vulkan/test/op_tests/utils/gen_computegraph.py
@@ -681,7 +681,6 @@ def gen_graph_build_code(self, include_declarations: bool = True) -> str:
             graph_build += self.set_output(self.refs["out"], include_declarations)
 
         graph_build += f"{self.graph}{self.dot}prepare();\n"
-        graph_build += f"{self.graph}{self.dot}encode_prepack();\n"
         graph_build += f"{self.graph}{self.dot}prepack();\n"
         graph_build += f"{self.graph}{self.dot}encode_execute();\n"
 
diff --git a/backends/vulkan/test/vulkan_compute_api_test.cpp b/backends/vulkan/test/vulkan_compute_api_test.cpp
@@ -1435,7 +1435,6 @@ TEST(VulkanComputeGraphTest, test_simple_prepacked_graph) {
 
   graph.prepare();
 
-  graph.encode_prepack();
   graph.prepack();
 
   graph.encode_execute();
@@ -2568,7 +2567,7 @@ void test_binary_op(
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -2641,7 +2640,7 @@ void test_mm(
       B, M, K, N, dtype, storage_type, memory_layout, mat2_data, prepack);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
 
   for (int i = 1; i < 4; i++) {
@@ -2722,7 +2721,7 @@ void test_mm_with_resize_reencode(
       B, M, K, N, dtype, storage_type, memory_layout, mat2_data, false);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -2800,7 +2799,7 @@ void test_max_pool2d(
   idx_ioval.staging = graph.set_output_tensor(idx_ioval.value);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -2879,7 +2878,7 @@ void test_grid_priors(
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
 
@@ -2983,7 +2982,7 @@ void test_transpose_view_mm(
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
 
   for (int i = 1; i < 4; i++) {
@@ -3049,7 +3048,7 @@ void test_to_copy() {
   out.staging = graph.set_output_tensor(out.value);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();
   graph.propagate_resize();
@@ -3236,7 +3235,7 @@ void test_dynamic_dispatch(int M, int N) {
   ComputeGraph graph = build_dynamic_dispatch_test_graph(M, N);
 
   graph.prepare();
-  graph.encode_prepack();
+
   graph.prepack();
   graph.encode_execute();