This change separates the general DP padding logic from the existing implementation specific to TorchAir. Make sure server do not hang when batch size < DP size.

yiz-liu · yiz-liu · commit 97e70d365084 · 2025-08-21T09:31:16.000+08:00
Signed-off-by: Yizhou Liu &lt;liu_yizhou@outlook.com&gt;
diff --git a/vllm_ascend/worker/model_runner_v1.py b/vllm_ascend/worker/model_runner_v1.py
@@ -1930,6 +1930,10 @@ def _dummy_run(
             )
 
         # Padding for DP
+        num_pad, num_tokens_across_dp_native = self.get_dp_padding(num_tokens)
+        # num_tokens += num_pad  ## Uncomment this after TorchAir is removed
+
+        # Padding for DP (for TorchAir)
         (num_tokens, num_tokens_across_dp, with_prefill,
          _) = self._get_forward_metadata_across_dp_and_pad(
              num_tokens, with_prefill, False)