Skip to content

Commit 7ae33b0

Browse files
committed
fix
1 parent b8ba61d commit 7ae33b0

File tree

1 file changed

+2
-2
lines changed

1 file changed

+2
-2
lines changed

tensorrt_llm/bench/dataclasses/reporting.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -119,9 +119,9 @@ def generate_statistics_summary(self, max_draft_tokens: int) -> None:
119119
# For speculative decoding, we need to track the number of draft tokens per request and the number of accepted draft tokens per request
120120
if max_draft_tokens > 0:
121121
num_draft_tokens.append(max_draft_tokens * (entry.decode_iteration + 1))
122-
num_accepted_draft_tokens.append(entry.num_generated_tokens - entry.decode_iteration - 1)
122+
num_accepted_draft_tokens.append(entry.num_total_output_tokens - entry.decode_iteration - 1)
123123
draft_acceptance_rate.append(float(num_accepted_draft_tokens[-1]) / float(num_draft_tokens[-1]))
124-
acceptance_length.append(entry.num_generated_tokens / (entry.decode_iteration +
124+
acceptance_length.append(entry.num_total_output_tokens / (entry.decode_iteration +
125125
1))
126126

127127
global_acceptance_length = sum(output_tokens) / total_decoding_iterations

0 commit comments

Comments
 (0)