WAR to unblock trtllm-serve w/ logprob in PyT backend

hchings · hchings · commit d217052f743d · 2025-07-22T08:49:49.000-07:00
Signed-off-by: Erin Ho &lt;14718778+hchings@users.noreply.github.com&gt;

update comment
diff --git a/tensorrt_llm/executor/result.py b/tensorrt_llm/executor/result.py
@@ -228,6 +228,10 @@ def _handle_sequence(self,
             output.logprobs = response_tensors.log_probs[src_idx]
             # overcome some WAR in the cpp executor
             if finish_reasons[src_idx] != tllm.FinishReason.CANCELLED:
+                if len(output.logprobs) > output.length:
+                    # LlmResult holds a reference to LogProbStorage, which may be updated by the worker before the result is serialized.
+                    # Therefore, we treat extra logprobs/logits as expected and only consume what's needed.
+                    output.logprobs = output.logprobs[:output.length]
                 assert len(output.logprobs) == output.length
         if response_tensors.generation_logits is not None:
             output.generation_logits = response_tensors.generation_logits[