We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent ff99639 commit d217052Copy full SHA for d217052
tensorrt_llm/executor/result.py
@@ -228,6 +228,10 @@ def _handle_sequence(self,
228
output.logprobs = response_tensors.log_probs[src_idx]
229
# overcome some WAR in the cpp executor
230
if finish_reasons[src_idx] != tllm.FinishReason.CANCELLED:
231
+ if len(output.logprobs) > output.length:
232
+ # LlmResult holds a reference to LogProbStorage, which may be updated by the worker before the result is serialized.
233
+ # Therefore, we treat extra logprobs/logits as expected and only consume what's needed.
234
+ output.logprobs = output.logprobs[:output.length]
235
assert len(output.logprobs) == output.length
236
if response_tensors.generation_logits is not None:
237
output.generation_logits = response_tensors.generation_logits[
0 commit comments