@@ -180,23 +180,23 @@ def latency_command(
180
180
logger .info ("Preparing to run latency benchmark..." )
181
181
# Parameters from CLI
182
182
# Model, experiment, and engine params
183
- dataset_path : Path = params .pop ("dataset" )
184
- num_requests : int = params .pop ("num_requests" )
183
+ dataset_path : Path = params .get ("dataset" )
184
+ num_requests : int = params .get ("num_requests" )
185
185
model : str = bench_env .model
186
186
checkpoint_path : Path = bench_env .checkpoint_path or bench_env .model
187
- engine_dir : Path = params .pop ("engine_dir" )
188
- concurrency : int = params .pop ("concurrency" )
189
- beam_width : int = params .pop ("beam_width" )
187
+ engine_dir : Path = params .get ("engine_dir" )
188
+ concurrency : int = params .get ("concurrency" )
189
+ beam_width : int = params .get ("beam_width" )
190
190
warmup : int = params .get ("warmup" )
191
- modality : str = params .pop ("modality" )
192
- max_input_len : int = params .pop ("max_input_len" )
193
- max_seq_len : int = params .pop ("max_seq_len" )
191
+ modality : str = params .get ("modality" )
192
+ max_input_len : int = params .get ("max_input_len" )
193
+ max_seq_len : int = params .get ("max_seq_len" )
194
194
backend : str = params .get ("backend" )
195
195
model_type = get_model_config (model , checkpoint_path ).model_type
196
196
197
197
# Runtime Options
198
- kv_cache_percent = params .pop ("kv_cache_free_gpu_mem_fraction" )
199
- medusa_choices = params .pop ("medusa_choices" )
198
+ kv_cache_percent = params .get ("kv_cache_free_gpu_mem_fraction" )
199
+ medusa_choices = params .get ("medusa_choices" )
200
200
201
201
# Reporting Options
202
202
report_json : Path = params .pop ("report_json" )
0 commit comments