@@ -79,11 +79,6 @@ class BenchmarkMetrics:
79
79
std_e2el_ms : float
80
80
percentiles_e2el_ms : list [tuple [float , float ]]
81
81
tput_user : list [float ]
82
- # Request accuracy rate metrics
83
- mean_request_ar : float
84
- median_request_ar : float
85
- std_request_ar : float
86
- percentiles_request_ar : list [tuple [float , float ]]
87
82
88
83
89
84
async def get_request (
@@ -136,7 +131,7 @@ def calculate_metrics(
136
131
selected_percentile_metrics : list [str ],
137
132
selected_percentiles : list [float ],
138
133
goodput_config_dict : dict [str , float ],
139
- ) -> tuple [BenchmarkMetrics , list [int ], list [ float ] ]:
134
+ ) -> tuple [BenchmarkMetrics , list [int ]]:
140
135
actual_output_lens : list [int ] = []
141
136
total_input = 0
142
137
completed = 0
@@ -147,7 +142,6 @@ def calculate_metrics(
147
142
ttfts : list [float ] = []
148
143
e2els : list [float ] = []
149
144
tput_user : list [float ] = []
150
- request_ars : list [float ] = [] # Request accuracy rates
151
145
for i in range (len (outputs )):
152
146
if outputs [i ].success :
153
147
output_len = outputs [i ].output_tokens
@@ -173,24 +167,9 @@ def calculate_metrics(
173
167
ttfts .append (outputs [i ].ttft )
174
168
e2els .append (outputs [i ].latency )
175
169
tput_user .append (output_len / (outputs [i ].latency ))
176
-
177
- # Calculate request accuracy rate (num_generated_tokens / (decode_iteration + 1))
178
- decode_iter = outputs [i ].decode_iteration
179
- if decode_iter >= 0 :
180
- # For generated tokens, we use output_len - 1 (excluding the first token if needed)
181
- # But according to the reference, it should be num_generated_tokens
182
- num_generated_tokens = max (0 , output_len -
183
- 1 ) if output_len > 1 else output_len
184
- request_ar = num_generated_tokens / (
185
- decode_iter + 1 ) if decode_iter >= 0 else 0.0
186
- request_ars .append (request_ar )
187
- else :
188
- request_ars .append (0.0 )
189
-
190
170
completed += 1
191
171
else :
192
172
actual_output_lens .append (0 )
193
- request_ars .append (0.0 )
194
173
195
174
if goodput_config_dict :
196
175
valid_metrics = []
@@ -249,13 +228,8 @@ def calculate_metrics(
249
228
percentiles_e2el_ms = [(p , np .percentile (e2els or 0 , p ) * 1000 )
250
229
for p in selected_percentiles ],
251
230
tput_user = np .mean (tput_user or 0 ),
252
- mean_request_ar = np .mean (request_ars or 0 ),
253
- median_request_ar = np .median (request_ars or 0 ),
254
- std_request_ar = np .std (request_ars or 0 ),
255
- percentiles_request_ar = [(p , np .percentile (request_ars or 0 , p ))
256
- for p in selected_percentiles ],
257
231
)
258
- return metrics , actual_output_lens , request_ars
232
+ return metrics , actual_output_lens
259
233
260
234
261
235
async def benchmark (
@@ -429,7 +403,7 @@ async def limited_request_func(request_func_input, streaming, pbar,
429
403
# Close the session
430
404
await session .close ()
431
405
432
- metrics , actual_output_lens , request_ars = calculate_metrics (
406
+ metrics , actual_output_lens = calculate_metrics (
433
407
input_requests = input_requests ,
434
408
outputs = outputs ,
435
409
dur_s = benchmark_duration ,
@@ -457,10 +431,6 @@ async def limited_request_func(request_func_input, streaming, pbar,
457
431
metrics .total_token_throughput ))
458
432
print ("{:<40} {:<10.2f}" .format ("User throughput (tok/s):" ,
459
433
metrics .tput_user ))
460
- print ("{:<40} {:<10.4f}" .format ("Mean Request AR:" ,
461
- metrics .mean_request_ar ))
462
- print ("{:<40} {:<10.4f}" .format ("Median Request AR:" ,
463
- metrics .median_request_ar ))
464
434
465
435
result = {
466
436
"duration" : benchmark_duration ,
@@ -473,17 +443,12 @@ async def limited_request_func(request_func_input, streaming, pbar,
473
443
"output_throughput" : metrics .output_throughput ,
474
444
"total_token_throughput" : metrics .total_token_throughput ,
475
445
"user_throughput" : metrics .tput_user ,
476
- "mean_request_ar" : metrics .mean_request_ar ,
477
- "median_request_ar" : metrics .median_request_ar ,
478
- "std_request_ar" : metrics .std_request_ar ,
479
446
"input_lens" : [output .prompt_len for output in outputs ],
480
447
"output_lens" : actual_output_lens ,
481
448
"ttfts" : [output .ttft for output in outputs ],
482
449
"itls" : [output .itl for output in outputs ],
483
450
"generated_texts" : [output .generated_text for output in outputs ],
484
451
"errors" : [output .error for output in outputs ],
485
- "request_ars" : request_ars ,
486
- "decode_iterations" : [output .decode_iteration for output in outputs ],
487
452
}
488
453
489
454
def process_one_metric (
@@ -569,15 +534,11 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
569
534
metrics = [
570
535
"median_ttft_ms" , "mean_ttft_ms" , "std_ttft_ms" , "p99_ttft_ms" ,
571
536
"mean_tpot_ms" , "median_tpot_ms" , "std_tpot_ms" , "p99_tpot_ms" ,
572
- "median_itl_ms" , "mean_itl_ms" , "std_itl_ms" , "p99_itl_ms" ,
573
- "mean_request_ar" , "median_request_ar" , "std_request_ar"
537
+ "median_itl_ms" , "mean_itl_ms" , "std_itl_ms" , "p99_itl_ms"
574
538
]
575
539
# These raw data might be useful, but they are rather big. They can be added
576
540
# later if needed
577
- ignored_metrics = [
578
- "ttfts" , "itls" , "generated_texts" , "errors" , "request_ars" ,
579
- "decode_iterations"
580
- ]
541
+ ignored_metrics = ["ttfts" , "itls" , "generated_texts" , "errors" ]
581
542
pt_records = convert_to_pytorch_benchmark_format (
582
543
args = args ,
583
544
metrics = {k : [results [k ]]
@@ -801,8 +762,7 @@ def main(args: argparse.Namespace):
801
762
# Remove fields with too many data points
802
763
for field in [
803
764
"input_lens" , "output_lens" , "ttfts" , "itls" ,
804
- "generated_texts" , "errors" , "request_ars" ,
805
- "decode_iterations"
765
+ "generated_texts" , "errors"
806
766
]:
807
767
if field in result_json :
808
768
del result_json [field ]
@@ -1003,11 +963,11 @@ def main(args: argparse.Namespace):
1003
963
parser .add_argument (
1004
964
"--percentile-metrics" ,
1005
965
type = str ,
1006
- default = "ttft,tpot,itl,request_ar " ,
966
+ default = "ttft,tpot,itl" ,
1007
967
help = "Comma-separated list of selected metrics to report percentils. "
1008
968
"This argument specifies the metrics to report percentiles. "
1009
- "Allowed metric names are \" ttft\" , \" tpot\" , \" itl\" , \" e2el\" , \" request_ar \" . "
1010
- "Default value is \" ttft,tpot,itl,request_ar \" ." )
969
+ "Allowed metric names are \" ttft\" , \" tpot\" , \" itl\" , \" e2el\" . "
970
+ "Default value is \" ttft,tpot,itl\" ." )
1011
971
parser .add_argument (
1012
972
"--metric-percentiles" ,
1013
973
type = str ,
0 commit comments