WAR to unblock trtllm-serve w/ logprob in PyT backend

hchings · hchings · commit a8640c694906 · 2025-07-21T16:53:24.000-07:00
Signed-off-by: Erin Ho &lt;14718778+hchings@users.noreply.github.com&gt;
diff --git a/tensorrt_llm/executor/result.py b/tensorrt_llm/executor/result.py
@@ -228,6 +228,9 @@ def _handle_sequence(self,
             output.logprobs = response_tensors.log_probs[src_idx]
             # overcome some WAR in the cpp executor
             if finish_reasons[src_idx] != tllm.FinishReason.CANCELLED:
+                if len(output.logprobs) > output.length:
+                    # WAR [nvbug 5398806]
+                    output.logprobs = output.logprobs[:output.length]
                 assert len(output.logprobs) == output.length
         if response_tensors.generation_logits is not None:
             output.generation_logits = response_tensors.generation_logits[