File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
tensorrt_llm/bench/dataclasses Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -119,9 +119,9 @@ def generate_statistics_summary(self, max_draft_tokens: int) -> None:
119
119
# For speculative decoding, we need to track the number of draft tokens per request and the number of accepted draft tokens per request
120
120
if max_draft_tokens > 0 :
121
121
num_draft_tokens .append (max_draft_tokens * (entry .decode_iteration + 1 ))
122
- num_accepted_draft_tokens .append (entry .num_generated_tokens - entry .decode_iteration - 1 )
122
+ num_accepted_draft_tokens .append (entry .num_total_output_tokens - entry .decode_iteration - 1 )
123
123
draft_acceptance_rate .append (float (num_accepted_draft_tokens [- 1 ]) / float (num_draft_tokens [- 1 ]))
124
- acceptance_length .append (entry .num_generated_tokens / (entry .decode_iteration +
124
+ acceptance_length .append (entry .num_total_output_tokens / (entry .decode_iteration +
125
125
1 ))
126
126
127
127
global_acceptance_length = sum (output_tokens ) / total_decoding_iterations
You can’t perform that action at this time.
0 commit comments