Skip to content

Commit 6a122cd

Browse files
crazydemodominicshanshan
authored andcommitted
[None][fix] update skip config (NVIDIA#6891)
Signed-off-by: Ivy Zhang <[email protected]> Signed-off-by: Wangshanshan <[email protected]>
1 parent 37c233c commit 6a122cd

File tree

7 files changed

+46
-41
lines changed

7 files changed

+46
-41
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -345,6 +345,7 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
345345
MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
346346

347347
@pytest.mark.skip_less_device_memory(32000)
348+
@pytest.mark.skip_less_device(2)
348349
@pytest.mark.parametrize("disable_overlap_scheduler", [False, True])
349350
def test_auto_dtype(self, disable_overlap_scheduler):
350351
ctx_server_config = {"disable_overlap_scheduler": True}
@@ -374,6 +375,8 @@ def test_auto_dtype(self, disable_overlap_scheduler):
374375
task = GSM8K(self.MODEL_NAME)
375376
task.evaluate(llm)
376377

378+
@pytest.mark.skip_less_device(2)
379+
@skip_pre_hopper
377380
def test_ngram(self):
378381
speculative_decoding_config = {
379382
"decoding_type": "NGram",
@@ -424,6 +427,7 @@ def test_ngram(self):
424427
@skip_pre_hopper
425428
@parametrize_with_ids("overlap_scheduler", [True, False])
426429
@parametrize_with_ids("eagle3_one_model", [True, False])
430+
@pytest.mark.skip_less_device(2)
427431
def test_eagle3(self, overlap_scheduler, eagle3_one_model):
428432
speculative_decoding_config = {
429433
"decoding_type": "Eagle",
@@ -578,7 +582,6 @@ def test_tp_pp_symmetric(self, tp, pp, testset):
578582
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, pp, tp, pp,
579583
tp, 1, 1, [get_accuracy_task(testset)])
580584

581-
@pytest.mark.skip_less_device(4)
582585
@parametrize_with_ids("ctx_pp", [2, 4])
583586
@parametrize_with_ids("gen_tp", [1, 2])
584587
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
@@ -589,20 +592,18 @@ def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
589592
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, ctx_pp, 1, 1,
590593
gen_tp, 1, 1, [get_accuracy_task(testset)])
591594

592-
@pytest.mark.skip_less_device(4)
593595
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
594596
def test_multi_instance(self, testset):
595597
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, 1, 1, 1, 1,
596598
2, 2, [get_accuracy_task(testset)])
597599

598600

599-
@pytest.mark.skip_less_device_memory(140000)
600-
@pytest.mark.timeout(3600)
601-
@pytest.mark.skip_less_device(4)
602601
class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
603602
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
604603
MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"
605604

605+
@pytest.mark.skip_less_device_memory(140000)
606+
@pytest.mark.timeout(3600)
606607
@pytest.mark.skip_less_device(8)
607608
@pytest.mark.parametrize("overlap_scheduler", [False, True])
608609
def test_auto_dtype(self, overlap_scheduler):
@@ -683,7 +684,7 @@ def test_nixl_backend(self):
683684
@parametrize_with_ids("overlap_scheduler", [True, False])
684685
@parametrize_with_ids("mtp_nextn",
685686
[0, pytest.param(2, marks=skip_pre_hopper)])
686-
@pytest.mark.skip_less_device(4)
687+
@pytest.mark.skip_less_device(8)
687688
def test_auto_dtype(self, overlap_scheduler, mtp_nextn):
688689
ctx_server_config = {"disable_overlap_scheduler": True}
689690
gen_server_config = {"disable_overlap_scheduler": not overlap_scheduler}
@@ -727,6 +728,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
727728
MODEL_NAME = "google/gemma-3-1b-it"
728729
MODEL_PATH = f"{llm_models_root()}/gemma/gemma-3-1b-it/"
729730

731+
@pytest.mark.skip_less_device(2)
730732
@pytest.mark.parametrize("overlap_scheduler", [False, True])
731733
def test_auto_dtype(self, overlap_scheduler):
732734
pytest.skip(
@@ -816,8 +818,9 @@ def test_nixl_backend(self):
816818
task = GSM8K(self.MODEL_NAME)
817819
task.evaluate(llm)
818820

819-
@pytest.mark.parametrize("overlap_scheduler", [False, True])
820821
@skip_pre_hopper
822+
@pytest.mark.skip_less_device(2)
823+
@pytest.mark.parametrize("overlap_scheduler", [False, True])
821824
def test_auto_dtype(self, overlap_scheduler):
822825
ctx_server_config = {
823826
"disable_overlap_scheduler": True,

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -823,25 +823,26 @@ def test_auto_dtype(self):
823823

824824
class TestMistralSmall24B(LlmapiAccuracyTestHarness):
825825
MODEL_NAME = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
826+
MODEL_PATH = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503"
826827

827828
@pytest.mark.skip_less_device_memory(80000)
828-
@pytest.mark.parametrize(
829-
"model_path, expected_quant_algo",
830-
[
831-
# Original bfloat16 model.
832-
(f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503", None),
833-
# FP8 model.
834-
pytest.param(
835-
f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8",
836-
QuantAlgo.FP8,
837-
marks=skip_pre_ada,
838-
),
839-
],
840-
)
841-
def test_auto_dtype(self, model_path, expected_quant_algo):
829+
def test_auto_dtype(self):
842830
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
831+
with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
832+
task = CnnDailymail(self.MODEL_NAME)
833+
task.evaluate(llm)
834+
task = MMLU(self.MODEL_NAME)
835+
task.evaluate(llm)
836+
task = GSM8K(self.MODEL_NAME)
837+
task.evaluate(llm)
838+
839+
@skip_pre_ada
840+
@pytest.mark.skip_less_device_memory(80000)
841+
def test_fp8(self):
842+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
843+
model_path = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8"
843844
with LLM(model_path, kv_cache_config=kv_cache_config) as llm:
844-
assert llm.args.quant_config.quant_algo == expected_quant_algo
845+
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
845846
task = CnnDailymail(self.MODEL_NAME)
846847
task.evaluate(llm)
847848
task = MMLU(self.MODEL_NAME)

tests/integration/test_lists/qa/llm_function_full.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -464,8 +464,8 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[
464464
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False]
465465
accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
466466
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
467-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
468-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
467+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
468+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
469469
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]
470470
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
471471
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]

tests/integration/test_lists/qa/llm_function_sanity.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -58,6 +58,7 @@ accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
5858
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
5959
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
6060
accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
61+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
6162
accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
6263
accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency]
6364
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4

tests/integration/test_lists/test-db/l0_dgx_b200.yml

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -41,10 +41,10 @@ l0_dgx_b200:
4141
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=False]
4242
- accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp4[tp4-cuda_graph=True]
4343
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_ucx[DeepSeek-V3-Lite-fp8]
44-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-TRTLLM]
45-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-CUTLASS]
46-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-TRITON]
47-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-TRTLLM]
44+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
45+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass]
46+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-triton]
47+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
4848
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
4949
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-bf16]
5050
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8]
@@ -94,11 +94,11 @@ l0_dgx_b200:
9494
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_nvfp4[dep4_latency_moe_cutlass-torch_compile=True]
9595
- accuracy/test_llm_api_pytorch.py::TestLlama4ScoutInstruct::test_fp8[tp4-cuda_graph=True]
9696
- accuracy/test_disaggregated_serving.py::TestQwen3_30B_A3B::test_mixed_ctx_gen_model[ctxpp2gentp2]
97-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-CUTLASS]
98-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-TRITON]
99-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-TRTLLM]
100-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-CUTLASS]
101-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-TRITON]
97+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
98+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
99+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm]
100+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
101+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
102102
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf]
103103
# ------------- AutoDeploy tests ---------------
104104
- accuracy/test_llm_api_autodeploy.py::TestLlama3_1_8B::test_auto_dtype

tests/integration/test_lists/test-db/l0_dgx_h100.yml

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -151,12 +151,12 @@ l0_dgx_h100:
151151
backend: pytorch
152152
auto_trigger: gpt_oss
153153
tests:
154-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-CUTLASS]
155-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-TRITON]
156-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-CUTLASS]
157-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-TRITON]
158-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-CUTLASS]
159-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-TRITON]
154+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
155+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
156+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-cutlass]
157+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-triton]
158+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
159+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
160160
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4a16[dp4]
161161
- condition:
162162
ranges:

tests/integration/test_lists/test-db/l0_h100.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -223,8 +223,8 @@ l0_h100:
223223
- accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_fp8_prequantized
224224
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized
225225
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype
226-
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
227-
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
226+
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
227+
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
228228
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]
229229
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=True]
230230
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]

0 commit comments

Comments
 (0)