Skip to content

Commit bbebac2

Browse files
committed
fix accuracy tests
Signed-off-by: Bo Deng <[email protected]>
1 parent d640d0b commit bbebac2

File tree

5 files changed

+54
-34
lines changed

5 files changed

+54
-34
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 50 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -281,30 +281,6 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
281281
MODEL_NAME = "meta-llama/Llama-3.1-8B-Instruct"
282282
MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
283283

284-
def test_nixl_backend(self):
285-
ctx_server_config = {"cache_transceiver_config": {"backend": "nixl"}}
286-
gen_server_config = {"cache_transceiver_config": {"backend": "nixl"}}
287-
disaggregated_server_config = {
288-
"hostname": "localhost",
289-
"port": 8000,
290-
"backend": "pytorch",
291-
"context_servers": {
292-
"num_instances": 1,
293-
"urls": ["localhost:8001"]
294-
},
295-
"generation_servers": {
296-
"num_instances": 1,
297-
"urls": ["localhost:8002"]
298-
}
299-
}
300-
with launch_disaggregated_llm(disaggregated_server_config,
301-
ctx_server_config, gen_server_config,
302-
self.MODEL_PATH) as llm:
303-
task = MMLU(self.MODEL_NAME)
304-
task.evaluate(llm)
305-
task = GSM8K(self.MODEL_NAME)
306-
task.evaluate(llm)
307-
308284
@pytest.mark.skip_less_device_memory(32000)
309285
@pytest.mark.parametrize("disable_overlap_scheduler", [False, True])
310286
def test_auto_dtype(self, disable_overlap_scheduler):
@@ -590,8 +566,18 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
590566
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"
591567

592568
def test_nixl_backend(self):
593-
ctx_server_config = {"cache_transceiver_config": {"backend": "nixl"}}
594-
gen_server_config = {"cache_transceiver_config": {"backend": "nixl"}}
569+
ctx_server_config = {
570+
"disable_overlap_scheduler": True,
571+
"cache_transceiver_config": {
572+
"backend": "nixl"
573+
}
574+
}
575+
gen_server_config = {
576+
"disable_overlap_scheduler": True,
577+
"cache_transceiver_config": {
578+
"backend": "nixl"
579+
}
580+
}
595581
disaggregated_server_config = {
596582
"hostname": "localhost",
597583
"port": 8000,
@@ -606,10 +592,8 @@ def test_nixl_backend(self):
606592
}
607593
}
608594
with launch_disaggregated_llm(disaggregated_server_config,
609-
ctx_server_config,
610-
gen_server_config,
611-
self.MODEL_PATH,
612-
tensor_parallel_size=4) as llm:
595+
ctx_server_config, gen_server_config,
596+
self.MODEL_PATH) as llm:
613597
task = MMLU(self.MODEL_NAME)
614598
task.evaluate(llm)
615599
task = GSM8K(self.MODEL_NAME)
@@ -716,6 +700,42 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
716700
MODEL_NAME = "Qwen3/Qwen3-8B"
717701
MODEL_PATH = f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8"
718702

703+
def test_nixl_backend(self):
704+
ctx_server_config = {
705+
"disable_overlap_scheduler": True,
706+
"cache_transceiver_config": {
707+
"backend": "nixl"
708+
}
709+
}
710+
gen_server_config = {
711+
"disable_overlap_scheduler": True,
712+
"cache_transceiver_config": {
713+
"backend": "nixl"
714+
}
715+
}
716+
ctx_server_config["cache_transceiver_config"]
717+
ctx_server_config["cache_transceiver_config"]
718+
disaggregated_server_config = {
719+
"hostname": "localhost",
720+
"port": 8000,
721+
"backend": "pytorch",
722+
"context_servers": {
723+
"num_instances": 1,
724+
"urls": ["localhost:8001"]
725+
},
726+
"generation_servers": {
727+
"num_instances": 1,
728+
"urls": ["localhost:8002"]
729+
}
730+
}
731+
with launch_disaggregated_llm(disaggregated_server_config,
732+
ctx_server_config, gen_server_config,
733+
self.MODEL_PATH) as llm:
734+
task = MMLU(self.MODEL_NAME)
735+
task.evaluate(llm)
736+
task = GSM8K(self.MODEL_NAME)
737+
task.evaluate(llm)
738+
719739
@pytest.mark.parametrize("overlap_scheduler", [False, True])
720740
def test_auto_dtype(self, overlap_scheduler):
721741
ctx_server_config = {

tests/integration/test_lists/qa/llm_function_full.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -560,7 +560,7 @@ accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype
560560
accuracy/test_llm_api_pytorch.py::TestPhi4MM::test_auto_dtype_long_rope
561561
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
562562
accuracy/test_llm_api_pytorch.py::TestEXAONE4::test_auto_dtype
563-
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_nixl_backend
563+
accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
564564
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
565565

566566
test_e2e.py::test_llama_e2e[use_cpp_session-remove_input_padding-]

tests/integration/test_lists/qa/llm_function_sanity.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -110,7 +110,7 @@ accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_fp8_block_scales[latency]
110110
accuracy/test_llm_api_pytorch.py::TestPhi4MiniInstruct::test_auto_dtype
111111
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[fp8-latency]
112112
accuracy/test_llm_api_pytorch.py::TestQwen3_8B::test_w4a8_mxfp4[mxfp8-latency]
113-
accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_nixl_backend
113+
accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
114114
accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
115115
disaggregated/test_disaggregated.py::test_disaggregated_cache_aware_balance[TinyLlama-1.1B-Chat-v1.0]
116116
disaggregated/test_disaggregated.py::test_disaggregated_cuda_graph[TinyLlama-1.1B-Chat-v1.0]

tests/integration/test_lists/test-db/l0_dgx_b200.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,5 +74,5 @@ l0_dgx_b200:
7474
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf]
7575
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8]
7676
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8]
77-
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_nixl_backend
77+
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
7878
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -52,7 +52,7 @@ l0_dgx_h100:
5252
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[GSM8K-gen_tp=2-ctx_pp=2]
5353
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=1-ctx_pp=2]
5454
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_ctx_pp_gen_tp_asymmetric[MMLU-gen_tp=2-ctx_pp=2]
55-
- accuracy/test_disaggregated_serving.py::TestLlama3_1_8BInstruct::test_nixl_backend
55+
- accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend
5656
- accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend
5757
- test_e2e.py::test_ptp_quickstart_advanced_bs1
5858
- test_e2e.py::test_ptp_quickstart_advanced_deepseek_v3_lite_4gpus_adp_balance[DeepSeek-V3-Lite-FP8-DeepSeek-V3-Lite/fp8]

0 commit comments

Comments
 (0)