Skip to content

Commit f465363

Browse files
committed
fix disagg test
Signed-off-by: Jhao-Ting Chen <[email protected]>
1 parent 7da2a8e commit f465363

File tree

1 file changed

+6
-4
lines changed

1 file changed

+6
-4
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -358,7 +358,8 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
358358
"max_batch_size": 1,
359359
"cache_transceiver_config": {
360360
"backend": "default"
361-
}
361+
},
362+
"cuda_graph_config": None,
362363
}
363364
gen_server_config = {
364365
"disable_overlap_scheduler": not overlap_scheduler,
@@ -367,11 +368,12 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
367368
"free_gpu_memory_fraction": 0.5,
368369
"enable_block_reuse": False
369370
},
370-
"max_num_tokens": 20, # BS * (draft token + 1)
371-
"max_batch_size": 4,
371+
"max_num_tokens": 13393 * 2,
372+
"max_batch_size": 16,
372373
"cache_transceiver_config": {
373374
"backend": "default"
374-
}
375+
},
376+
"cuda_graph_config": None,
375377
}
376378
disaggregated_server_config = {
377379
"hostname": "localhost",

0 commit comments

Comments
 (0)