Skip to content

Commit e307cbc

Browse files
crazydemodominicshanshan
authored andcommitted
[None][fix] update skip config (NVIDIA#6891)
Signed-off-by: Ivy Zhang <[email protected]> Signed-off-by: Wangshanshan <[email protected]>
1 parent 340b5bd commit e307cbc

File tree

5 files changed

+34
-27
lines changed

5 files changed

+34
-27
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 13 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
345345
MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
346346

347347
@pytest.mark.skip_less_device_memory(32000)
348+
@pytest.mark.skip_less_device(2)
348349
@pytest.mark.parametrize("disable_overlap_scheduler", [False, True])
349350
def test_auto_dtype(self, disable_overlap_scheduler):
350351
ctx_server_config = {"disable_overlap_scheduler": True}
@@ -374,6 +375,8 @@ def test_auto_dtype(self, disable_overlap_scheduler):
374375
task = GSM8K(self.MODEL_NAME)
375376
task.evaluate(llm)
376377

378+
@pytest.mark.skip_less_device(2)
379+
@skip_pre_hopper
377380
def test_ngram(self):
378381
speculative_decoding_config = {
379382
"decoding_type": "NGram",
@@ -424,6 +427,7 @@ def test_ngram(self):
424427
@skip_pre_hopper
425428
@parametrize_with_ids("overlap_scheduler", [True, False])
426429
@parametrize_with_ids("eagle3_one_model", [True, False])
430+
@pytest.mark.skip_less_device(2)
427431
def test_eagle3(self, overlap_scheduler, eagle3_one_model):
428432
speculative_decoding_config = {
429433
"decoding_type": "Eagle",
@@ -578,7 +582,6 @@ def test_tp_pp_symmetric(self, tp, pp, testset):
578582
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, pp, tp, pp,
579583
tp, 1, 1, [get_accuracy_task(testset)])
580584

581-
@pytest.mark.skip_less_device(4)
582585
@parametrize_with_ids("ctx_pp", [2, 4])
583586
@parametrize_with_ids("gen_tp", [1, 2])
584587
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
@@ -589,20 +592,18 @@ def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
589592
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, ctx_pp, 1, 1,
590593
gen_tp, 1, 1, [get_accuracy_task(testset)])
591594

592-
@pytest.mark.skip_less_device(4)
593595
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
594596
def test_multi_instance(self, testset):
595597
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, 1, 1, 1, 1,
596598
2, 2, [get_accuracy_task(testset)])
597599

598600

599-
@pytest.mark.skip_less_device_memory(140000)
600-
@pytest.mark.timeout(3600)
601-
@pytest.mark.skip_less_device(4)
602601
class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
603602
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
604603
MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"
605604

605+
@pytest.mark.skip_less_device_memory(140000)
606+
@pytest.mark.timeout(3600)
606607
@pytest.mark.skip_less_device(8)
607608
@pytest.mark.parametrize("overlap_scheduler", [False, True])
608609
def test_auto_dtype(self, overlap_scheduler):
@@ -642,6 +643,8 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
642643
MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite"
643644
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"
644645

646+
@pytest.mark.skip_less_device(2)
647+
@pytest.mark.skip_less_device_memory(60000)
645648
def test_nixl_backend(self):
646649
ctx_server_config = {
647650
"disable_overlap_scheduler": True,
@@ -680,7 +683,7 @@ def test_nixl_backend(self):
680683
@parametrize_with_ids("overlap_scheduler", [True, False])
681684
@parametrize_with_ids("mtp_nextn",
682685
[0, pytest.param(2, marks=skip_pre_hopper)])
683-
@pytest.mark.skip_less_device(4)
686+
@pytest.mark.skip_less_device(8)
684687
def test_auto_dtype(self, overlap_scheduler, mtp_nextn):
685688
ctx_server_config = {"disable_overlap_scheduler": True}
686689
gen_server_config = {"disable_overlap_scheduler": not overlap_scheduler}
@@ -724,6 +727,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
724727
MODEL_NAME = "google/gemma-3-1b-it"
725728
MODEL_PATH = f"{llm_models_root()}/gemma/gemma-3-1b-it/"
726729

730+
@pytest.mark.skip_less_device(2)
727731
@pytest.mark.parametrize("overlap_scheduler", [False, True])
728732
def test_auto_dtype(self, overlap_scheduler):
729733
pytest.skip(
@@ -779,6 +783,7 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
779783
MODEL_NAME = "Qwen3/Qwen3-8B"
780784
MODEL_PATH = f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8"
781785

786+
@pytest.mark.skip_less_device(2)
782787
def test_nixl_backend(self):
783788
ctx_server_config = {
784789
"disable_overlap_scheduler": True,
@@ -813,8 +818,9 @@ def test_nixl_backend(self):
813818
task = GSM8K(self.MODEL_NAME)
814819
task.evaluate(llm)
815820

816-
@pytest.mark.parametrize("overlap_scheduler", [False, True])
817821
@skip_pre_hopper
822+
@pytest.mark.skip_less_device(2)
823+
@pytest.mark.parametrize("overlap_scheduler", [False, True])
818824
def test_auto_dtype(self, overlap_scheduler):
819825
ctx_server_config = {
820826
"disable_overlap_scheduler": True,

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -779,25 +779,26 @@ def test_auto_dtype(self):
779779

780780
class TestMistralSmall24B(LlmapiAccuracyTestHarness):
781781
MODEL_NAME = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
782+
MODEL_PATH = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503"
782783

783784
@pytest.mark.skip_less_device_memory(80000)
784-
@pytest.mark.parametrize(
785-
"model_path, expected_quant_algo",
786-
[
787-
# Original bfloat16 model.
788-
(f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503", None),
789-
# FP8 model.
790-
pytest.param(
791-
f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8",
792-
QuantAlgo.FP8,
793-
marks=skip_pre_ada,
794-
),
795-
],
796-
)
797-
def test_auto_dtype(self, model_path, expected_quant_algo):
785+
def test_auto_dtype(self):
798786
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
787+
with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
788+
task = CnnDailymail(self.MODEL_NAME)
789+
task.evaluate(llm)
790+
task = MMLU(self.MODEL_NAME)
791+
task.evaluate(llm)
792+
task = GSM8K(self.MODEL_NAME)
793+
task.evaluate(llm)
794+
795+
@skip_pre_ada
796+
@pytest.mark.skip_less_device_memory(80000)
797+
def test_fp8(self):
798+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
799+
model_path = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8"
799800
with LLM(model_path, kv_cache_config=kv_cache_config) as llm:
800-
assert llm.args.quant_config.quant_algo == expected_quant_algo
801+
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
801802
task = CnnDailymail(self.MODEL_NAME)
802803
task.evaluate(llm)
803804
task = MMLU(self.MODEL_NAME)

tests/integration/test_lists/qa/llm_function_full.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,8 +464,8 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[
464464
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False]
465465
accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
466466
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
467-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
468-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
467+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
468+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
469469
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]
470470
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
471471
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]

tests/integration/test_lists/qa/llm_function_sanity.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online
4747
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=False]
4848
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
4949
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
50-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
50+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
5151
accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
5252
accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency]
5353
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4

tests/integration/test_lists/test-db/l0_h100.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -220,8 +220,8 @@ l0_h100:
220220
- accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_fp8_prequantized
221221
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized
222222
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype
223-
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
224-
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
223+
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
224+
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
225225
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]
226226
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=True]
227227
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]

0 commit comments

Comments
 (0)