more pre-commit fix

tianmu-li · tianmu-li · commit 68609db7de99 · 2025-09-11T02:14:12.000+03:00
Signed-off-by: Tianmu Li &lt;tianmu.li@intel.com&gt;
diff --git a/vllm_gaudi/v1/worker/hpu_model_runner.py b/vllm_gaudi/v1/worker/hpu_model_runner.py
@@ -2044,7 +2044,8 @@ def _prepare_input_ids(self, total_num_scheduled_tokens: int, cu_num_tokens: np.
         indices_match = True
         max_flattened_index = -1
         for req_id, cur_index in self.input_batch.req_id_to_index.items():
-            if req_id in self.input_batch.prev_sampled_token_ids_invalid_indices:
+            if (self.input_batch.prev_sampled_token_ids_invalid_indices is not None
+                    and req_id in self.input_batch.prev_sampled_token_ids_invalid_indices):
                 # This request was in the previous batch but its
                 # prev_sampled_token_ids is invalid
                 continue
@@ -2847,6 +2848,8 @@ def execute_model(
         req_id_to_index_output_copy = \
             self.input_batch.req_id_to_index.copy()
 
+        max_req_index = max(self.input_batch.req_id_to_index.values())
+        postprocessed_sampled_token_ids: list[list[int]] = [[] for _ in range(max_req_index + 1)]
         if self.use_async_scheduling:
             assert not self.speculative_config, "Speculative decoding not supported with async scheduling"
             self.input_batch.prev_sampled_token_ids = \
@@ -2859,11 +2862,7 @@ def execute_model(
                 req_id: i
                 for i, req_id in enumerate(self.input_batch.req_ids) if i not in invalid_req_indices_set
             }
-
-            # For the output, create placeholder sampled_token_ids
-            # (will be filled during serialization)
-            max_req_index = max(self.input_batch.req_id_to_index.values())
-            postprocessed_sampled_token_ids = [[] for _ in range(max_req_index + 1)]
+            # For the output, postprocessed_sampled_token_ids will be filled during serialization
         else:
             # From this point onward, all operations are done on CPU.
             # We already have tokens. Let's copy the data to
@@ -2874,9 +2873,6 @@ def execute_model(
                 sampled_token_ids_list = torch.cat(decode_sampled_token_ids + prefill_sampled_token_ids).tolist()
                 sampled_token_requests = \
                     decode_sampled_requests + prefill_sampled_requests
-                max_req_index = max(self.input_batch.req_id_to_index.values())
-                postprocessed_sampled_token_ids: list[list]
-                postprocessed_sampled_token_ids = [[] for _ in range(max_req_index + 1)]
                 # NOTE(Chendi): in post-processing, spec_decode might
                 # return more than 1 token during decode.
                 start_idx = 0
@@ -2928,12 +2924,10 @@ def execute_model(
             req_state.output_token_ids.extend(sampled_ids)
 
         # Create output.
-        all_req_ids = pd_info.decode_req_ids + pd_info.prompt_req_ids
         # prompt_logprobs_dict: dict[
         #    str, Optional[LogprobsTensors]] = self._get_prompt_logprobs_dict(
         #        prefill_hidden_states_device, scheduler_output)
         prompt_logprobs_dict: dict[str, Optional[LogprobsTensors]] = {}
-        all_req_ids = pd_info.decode_req_ids + pd_info.prompt_req_ids
         logprobs = None
 
         model_runner_output = ModelRunnerOutput(