@@ -79,11 +79,6 @@ class BenchmarkMetrics:
79
79
std_e2el_ms : float
80
80
percentiles_e2el_ms : list [tuple [float , float ]]
81
81
tput_user : list [float ]
82
- # Request accuracy rate metrics
83
- mean_request_ar : float
84
- median_request_ar : float
85
- std_request_ar : float
86
- percentiles_request_ar : list [tuple [float , float ]]
87
82
88
83
89
84
async def get_request (
@@ -136,7 +131,7 @@ def calculate_metrics(
136
131
selected_percentile_metrics : list [str ],
137
132
selected_percentiles : list [float ],
138
133
goodput_config_dict : dict [str , float ],
139
- ) -> tuple [BenchmarkMetrics , list [int ], list [ float ] ]:
134
+ ) -> tuple [BenchmarkMetrics , list [int ]]:
140
135
actual_output_lens : list [int ] = []
141
136
total_input = 0
142
137
completed = 0
@@ -147,7 +142,6 @@ def calculate_metrics(
147
142
ttfts : list [float ] = []
148
143
e2els : list [float ] = []
149
144
tput_user : list [float ] = []
150
- request_ars : list [float ] = [] # Request accuracy rates
151
145
for i in range (len (outputs )):
152
146
if outputs [i ].success :
153
147
output_len = outputs [i ].output_tokens
@@ -173,24 +167,9 @@ def calculate_metrics(
173
167
ttfts .append (outputs [i ].ttft )
174
168
e2els .append (outputs [i ].latency )
175
169
tput_user .append (output_len / (outputs [i ].latency ))
176
-
177
- # Calculate request accuracy rate (num_generated_tokens / (decode_iteration + 1))
178
- decode_iter = outputs [i ].decode_iteration
179
- if decode_iter >= 0 :
180
- # For generated tokens, we use output_len - 1 (excluding the first token if needed)
181
- # But according to the reference, it should be num_generated_tokens
182
- num_generated_tokens = max (0 , output_len -
183
- 1 ) if output_len > 1 else output_len
184
- request_ar = num_generated_tokens / (
185
- decode_iter + 1 ) if decode_iter >= 0 else 0.0
186
- request_ars .append (request_ar )
187
- else :
188
- request_ars .append (0.0 )
189
-
190
170
completed += 1
191
171
else :
192
172
actual_output_lens .append (0 )
193
- request_ars .append (0.0 )
194
173
195
174
if goodput_config_dict :
196
175
valid_metrics = []
@@ -249,13 +228,8 @@ def calculate_metrics(
249
228
percentiles_e2el_ms = [(p , np .percentile (e2els or 0 , p ) * 1000 )
250
229
for p in selected_percentiles ],
251
230
tput_user = np .mean (tput_user or 0 ),
252
- mean_request_ar = np .mean (request_ars or 0 ),
253
- median_request_ar = np .median (request_ars or 0 ),
254
- std_request_ar = np .std (request_ars or 0 ),
255
- percentiles_request_ar = [(p , np .percentile (request_ars or 0 , p ))
256
- for p in selected_percentiles ],
257
231
)
258
- return metrics , actual_output_lens , request_ars
232
+ return metrics , actual_output_lens
259
233
260
234
261
235
async def benchmark (
@@ -439,7 +413,7 @@ async def limited_request_func(request_func_input, streaming, pbar,
439
413
# Close the session
440
414
await session .close ()
441
415
442
- metrics , actual_output_lens , request_ars = calculate_metrics (
416
+ metrics , actual_output_lens = calculate_metrics (
443
417
input_requests = input_requests ,
444
418
outputs = outputs ,
445
419
dur_s = benchmark_duration ,
@@ -467,10 +441,6 @@ async def limited_request_func(request_func_input, streaming, pbar,
467
441
metrics .total_token_throughput ))
468
442
print ("{:<40} {:<10.2f}" .format ("User throughput (tok/s):" ,
469
443
metrics .tput_user ))
470
- print ("{:<40} {:<10.4f}" .format ("Mean Request AR:" ,
471
- metrics .mean_request_ar ))
472
- print ("{:<40} {:<10.4f}" .format ("Median Request AR:" ,
473
- metrics .median_request_ar ))
474
444
475
445
result = {
476
446
"duration" : benchmark_duration ,
@@ -483,17 +453,12 @@ async def limited_request_func(request_func_input, streaming, pbar,
483
453
"output_throughput" : metrics .output_throughput ,
484
454
"total_token_throughput" : metrics .total_token_throughput ,
485
455
"user_throughput" : metrics .tput_user ,
486
- "mean_request_ar" : metrics .mean_request_ar ,
487
- "median_request_ar" : metrics .median_request_ar ,
488
- "std_request_ar" : metrics .std_request_ar ,
489
456
"input_lens" : [output .prompt_len for output in outputs ],
490
457
"output_lens" : actual_output_lens ,
491
458
"ttfts" : [output .ttft for output in outputs ],
492
459
"itls" : [output .itl for output in outputs ],
493
460
"generated_texts" : [output .generated_text for output in outputs ],
494
461
"errors" : [output .error for output in outputs ],
495
- "request_ars" : request_ars ,
496
- "decode_iterations" : [output .decode_iteration for output in outputs ],
497
462
}
498
463
499
464
def process_one_metric (
@@ -579,15 +544,11 @@ def save_to_pytorch_benchmark_format(args: argparse.Namespace,
579
544
metrics = [
580
545
"median_ttft_ms" , "mean_ttft_ms" , "std_ttft_ms" , "p99_ttft_ms" ,
581
546
"mean_tpot_ms" , "median_tpot_ms" , "std_tpot_ms" , "p99_tpot_ms" ,
582
- "median_itl_ms" , "mean_itl_ms" , "std_itl_ms" , "p99_itl_ms" ,
583
- "mean_request_ar" , "median_request_ar" , "std_request_ar"
547
+ "median_itl_ms" , "mean_itl_ms" , "std_itl_ms" , "p99_itl_ms"
584
548
]
585
549
# These raw data might be useful, but they are rather big. They can be added
586
550
# later if needed
587
- ignored_metrics = [
588
- "ttfts" , "itls" , "generated_texts" , "errors" , "request_ars" ,
589
- "decode_iterations"
590
- ]
551
+ ignored_metrics = ["ttfts" , "itls" , "generated_texts" , "errors" ]
591
552
pt_records = convert_to_pytorch_benchmark_format (
592
553
args = args ,
593
554
metrics = {k : [results [k ]]
@@ -858,8 +819,7 @@ def create_dataset_and_sample(dataset_name: str):
858
819
# Remove fields with too many data points
859
820
for field in [
860
821
"input_lens" , "output_lens" , "ttfts" , "itls" ,
861
- "generated_texts" , "errors" , "request_ars" ,
862
- "decode_iterations"
822
+ "generated_texts" , "errors"
863
823
]:
864
824
if field in result_json :
865
825
del result_json [field ]
@@ -1061,11 +1021,11 @@ def create_dataset_and_sample(dataset_name: str):
1061
1021
parser .add_argument (
1062
1022
"--percentile-metrics" ,
1063
1023
type = str ,
1064
- default = "ttft,tpot,itl,request_ar " ,
1024
+ default = "ttft,tpot,itl" ,
1065
1025
help = "Comma-separated list of selected metrics to report percentils. "
1066
1026
"This argument specifies the metrics to report percentiles. "
1067
- "Allowed metric names are \" ttft\" , \" tpot\" , \" itl\" , \" e2el\" , \" request_ar \" . "
1068
- "Default value is \" ttft,tpot,itl,request_ar \" ." )
1027
+ "Allowed metric names are \" ttft\" , \" tpot\" , \" itl\" , \" e2el\" . "
1028
+ "Default value is \" ttft,tpot,itl\" ." )
1069
1029
parser .add_argument (
1070
1030
"--metric-percentiles" ,
1071
1031
type = str ,
0 commit comments