Skip to content

Commit 055fdd9

Browse files
authored
[None][fix] update skip config (#6891)
Signed-off-by: Ivy Zhang <[email protected]>
1 parent 96bda14 commit 055fdd9

File tree

5 files changed

+35
-28
lines changed

5 files changed

+35
-28
lines changed

tests/integration/defs/accuracy/test_disaggregated_serving.py

Lines changed: 14 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -302,6 +302,7 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
302302
MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"
303303

304304
@pytest.mark.skip_less_device_memory(32000)
305+
@pytest.mark.skip_less_device(2)
305306
@pytest.mark.parametrize("disable_overlap_scheduler", [False, True])
306307
def test_auto_dtype(self, disable_overlap_scheduler):
307308
ctx_server_config = {"disable_overlap_scheduler": True}
@@ -331,6 +332,8 @@ def test_auto_dtype(self, disable_overlap_scheduler):
331332
task = GSM8K(self.MODEL_NAME)
332333
task.evaluate(llm)
333334

335+
@pytest.mark.skip_less_device(2)
336+
@skip_pre_hopper
334337
def test_ngram(self):
335338
speculative_decoding_config = {
336339
"decoding_type": "NGram",
@@ -381,6 +384,7 @@ def test_ngram(self):
381384
@skip_pre_hopper
382385
@parametrize_with_ids("overlap_scheduler", [True, False])
383386
@parametrize_with_ids("eagle3_one_model", [True, False])
387+
@pytest.mark.skip_less_device(2)
384388
def test_eagle3(self, overlap_scheduler, eagle3_one_model):
385389
speculative_decoding_config = {
386390
"decoding_type": "Eagle",
@@ -437,36 +441,33 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
437441
task = GSM8K(self.MODEL_NAME)
438442
task.evaluate(llm)
439443

440-
@pytest.mark.skip_less_device(2)
441444
@pytest.mark.parametrize("tp,pp", [(1, 2), (2, 1), (2, 2)],
442445
ids=["tp1pp2", "tp2pp1", "tp2pp2"])
443446
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
444447
def test_tp_pp_symmetric(self, tp, pp, testset):
445448
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, pp, tp, pp,
446449
tp, 1, 1, get_accuracy_task(testset))
447450

448-
@pytest.mark.skip_less_device(4)
449451
@parametrize_with_ids("ctx_pp", [2, 4])
450452
@parametrize_with_ids("gen_tp", [1, 2])
451453
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
452454
def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
453455
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, ctx_pp, 1, 1,
454456
gen_tp, 1, 1, get_accuracy_task(testset))
455457

456-
@pytest.mark.skip_less_device(4)
457458
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
458459
def test_multi_instance(self, testset):
459460
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, 1, 1, 1, 1,
460461
2, 2, get_accuracy_task(testset))
461462

462463

463-
@pytest.mark.skip_less_device_memory(140000)
464-
@pytest.mark.timeout(3600)
465-
@pytest.mark.skip_less_device(4)
466464
class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
467465
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
468466
MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"
469467

468+
@pytest.mark.skip_less_device_memory(140000)
469+
@pytest.mark.timeout(3600)
470+
@pytest.mark.skip_less_device(8)
470471
@pytest.mark.parametrize("overlap_scheduler", [False, True])
471472
def test_auto_dtype(self, overlap_scheduler):
472473
ctx_server_config = {"disable_overlap_scheduler": True}
@@ -505,6 +506,8 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
505506
MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite"
506507
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"
507508

509+
@pytest.mark.skip_less_device(2)
510+
@pytest.mark.skip_less_device_memory(60000)
508511
def test_nixl_backend(self):
509512
ctx_server_config = {
510513
"disable_overlap_scheduler": True,
@@ -542,7 +545,7 @@ def test_nixl_backend(self):
542545
@parametrize_with_ids("overlap_scheduler", [True, False])
543546
@parametrize_with_ids("mtp_nextn",
544547
[0, pytest.param(2, marks=skip_pre_hopper)])
545-
@pytest.mark.skip_less_device(4)
548+
@pytest.mark.skip_less_device(8)
546549
def test_auto_dtype(self, overlap_scheduler, mtp_nextn):
547550
ctx_server_config = {"disable_overlap_scheduler": True}
548551
gen_server_config = {"disable_overlap_scheduler": not overlap_scheduler}
@@ -586,6 +589,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
586589
MODEL_NAME = "google/gemma-3-1b-it"
587590
MODEL_PATH = f"{llm_models_root()}/gemma/gemma-3-1b-it/"
588591

592+
@pytest.mark.skip_less_device(2)
589593
@pytest.mark.parametrize("overlap_scheduler", [False, True])
590594
def test_auto_dtype(self, overlap_scheduler):
591595
ctx_server_config = {
@@ -637,6 +641,7 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
637641
MODEL_NAME = "Qwen3/Qwen3-8B"
638642
MODEL_PATH = f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8"
639643

644+
@pytest.mark.skip_less_device(2)
640645
def test_nixl_backend(self):
641646
ctx_server_config = {
642647
"disable_overlap_scheduler": True,
@@ -673,8 +678,9 @@ def test_nixl_backend(self):
673678
task = GSM8K(self.MODEL_NAME)
674679
task.evaluate(llm)
675680

676-
@pytest.mark.parametrize("overlap_scheduler", [False, True])
677681
@skip_pre_hopper
682+
@pytest.mark.skip_less_device(2)
683+
@pytest.mark.parametrize("overlap_scheduler", [False, True])
678684
def test_auto_dtype(self, overlap_scheduler):
679685
ctx_server_config = {
680686
"disable_overlap_scheduler": True,

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 16 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -695,25 +695,26 @@ def test_auto_dtype(self):
695695

696696
class TestMistralSmall24B(LlmapiAccuracyTestHarness):
697697
MODEL_NAME = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
698+
MODEL_PATH = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503"
698699

699700
@pytest.mark.skip_less_device_memory(80000)
700-
@pytest.mark.parametrize(
701-
"model_path, expected_quant_algo",
702-
[
703-
# Original bfloat16 model.
704-
(f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503", None),
705-
# FP8 model.
706-
pytest.param(
707-
f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8",
708-
QuantAlgo.FP8,
709-
marks=skip_pre_ada,
710-
),
711-
],
712-
)
713-
def test_auto_dtype(self, model_path, expected_quant_algo):
701+
def test_auto_dtype(self):
714702
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
703+
with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
704+
task = CnnDailymail(self.MODEL_NAME)
705+
task.evaluate(llm)
706+
task = MMLU(self.MODEL_NAME)
707+
task.evaluate(llm)
708+
task = GSM8K(self.MODEL_NAME)
709+
task.evaluate(llm)
710+
711+
@skip_pre_ada
712+
@pytest.mark.skip_less_device_memory(80000)
713+
def test_fp8(self):
714+
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
715+
model_path = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8"
715716
with LLM(model_path, kv_cache_config=kv_cache_config) as llm:
716-
assert llm.args.quant_config.quant_algo == expected_quant_algo
717+
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
717718
task = CnnDailymail(self.MODEL_NAME)
718719
task.evaluate(llm)
719720
task = MMLU(self.MODEL_NAME)

tests/integration/test_lists/qa/llm_function_full.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -454,8 +454,8 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[
454454
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False]
455455
accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
456456
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
457-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
458-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
457+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
458+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
459459
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]
460460
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
461461
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]

tests/integration/test_lists/qa/llm_function_sanity.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online
4646
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=False]
4747
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
4848
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
49-
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
49+
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
5050
accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
5151
accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency]
5252
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4

tests/integration/test_lists/test-db/l0_h100.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -205,8 +205,8 @@ l0_h100:
205205
- accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_fp8_prequantized
206206
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized
207207
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype
208-
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
209-
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
208+
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
209+
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
210210
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]
211211
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=True]
212212
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]

0 commit comments

Comments
 (0)