Skip to content

[None][fix] update skip config #6891

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Aug 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 14 additions & 8 deletions tests/integration/defs/accuracy/test_disaggregated_serving.py
Original file line number Diff line number Diff line change
Expand Up @@ -302,6 +302,7 @@ class TestLlama3_1_8BInstruct(LlmapiAccuracyTestHarness):
MODEL_PATH = f"{llm_models_root()}/llama-3.1-model/Llama-3.1-8B-Instruct"

@pytest.mark.skip_less_device_memory(32000)
@pytest.mark.skip_less_device(2)
@pytest.mark.parametrize("disable_overlap_scheduler", [False, True])
def test_auto_dtype(self, disable_overlap_scheduler):
ctx_server_config = {"disable_overlap_scheduler": True}
Expand Down Expand Up @@ -331,6 +332,8 @@ def test_auto_dtype(self, disable_overlap_scheduler):
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)

@pytest.mark.skip_less_device(2)
@skip_pre_hopper
def test_ngram(self):
speculative_decoding_config = {
"decoding_type": "NGram",
Expand Down Expand Up @@ -381,6 +384,7 @@ def test_ngram(self):
@skip_pre_hopper
@parametrize_with_ids("overlap_scheduler", [True, False])
@parametrize_with_ids("eagle3_one_model", [True, False])
@pytest.mark.skip_less_device(2)
def test_eagle3(self, overlap_scheduler, eagle3_one_model):
speculative_decoding_config = {
"decoding_type": "Eagle",
Expand Down Expand Up @@ -437,36 +441,33 @@ def test_eagle3(self, overlap_scheduler, eagle3_one_model):
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)

@pytest.mark.skip_less_device(2)
@pytest.mark.parametrize("tp,pp", [(1, 2), (2, 1), (2, 2)],
ids=["tp1pp2", "tp2pp1", "tp2pp2"])
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
def test_tp_pp_symmetric(self, tp, pp, testset):
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, pp, tp, pp,
tp, 1, 1, get_accuracy_task(testset))

@pytest.mark.skip_less_device(4)
@parametrize_with_ids("ctx_pp", [2, 4])
@parametrize_with_ids("gen_tp", [1, 2])
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
def test_ctx_pp_gen_tp_asymmetric(self, ctx_pp, gen_tp, testset):
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, ctx_pp, 1, 1,
gen_tp, 1, 1, get_accuracy_task(testset))

@pytest.mark.skip_less_device(4)
@pytest.mark.parametrize("testset", ["GSM8K", "MMLU"])
def test_multi_instance(self, testset):
return run_parallel_test(self.MODEL_NAME, self.MODEL_PATH, 1, 1, 1, 1,
2, 2, get_accuracy_task(testset))


@pytest.mark.skip_less_device_memory(140000)
@pytest.mark.timeout(3600)
@pytest.mark.skip_less_device(4)
class TestLlama4ScoutInstruct(LlmapiAccuracyTestHarness):
MODEL_NAME = "meta-llama/Llama-4-Scout-17B-16E-Instruct"
MODEL_PATH = f"{llm_models_root()}/llama4-models/Llama-4-Scout-17B-16E-Instruct"

@pytest.mark.skip_less_device_memory(140000)
@pytest.mark.timeout(3600)
@pytest.mark.skip_less_device(8)
@pytest.mark.parametrize("overlap_scheduler", [False, True])
def test_auto_dtype(self, overlap_scheduler):
ctx_server_config = {"disable_overlap_scheduler": True}
Expand Down Expand Up @@ -505,6 +506,8 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness):
MODEL_NAME = "deepseek-ai/DeepSeek-V3-Lite"
MODEL_PATH = f"{llm_models_root()}/DeepSeek-V3-Lite/bf16"

@pytest.mark.skip_less_device(2)
@pytest.mark.skip_less_device_memory(60000)
def test_nixl_backend(self):
ctx_server_config = {
"disable_overlap_scheduler": True,
Expand Down Expand Up @@ -542,7 +545,7 @@ def test_nixl_backend(self):
@parametrize_with_ids("overlap_scheduler", [True, False])
@parametrize_with_ids("mtp_nextn",
[0, pytest.param(2, marks=skip_pre_hopper)])
@pytest.mark.skip_less_device(4)
@pytest.mark.skip_less_device(8)
def test_auto_dtype(self, overlap_scheduler, mtp_nextn):
ctx_server_config = {"disable_overlap_scheduler": True}
gen_server_config = {"disable_overlap_scheduler": not overlap_scheduler}
Expand Down Expand Up @@ -586,6 +589,7 @@ class TestGemma3_1BInstruct(LlmapiAccuracyTestHarness):
MODEL_NAME = "google/gemma-3-1b-it"
MODEL_PATH = f"{llm_models_root()}/gemma/gemma-3-1b-it/"

@pytest.mark.skip_less_device(2)
@pytest.mark.parametrize("overlap_scheduler", [False, True])
def test_auto_dtype(self, overlap_scheduler):
ctx_server_config = {
Expand Down Expand Up @@ -637,6 +641,7 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness):
MODEL_NAME = "Qwen3/Qwen3-8B"
MODEL_PATH = f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8"

@pytest.mark.skip_less_device(2)
def test_nixl_backend(self):
ctx_server_config = {
"disable_overlap_scheduler": True,
Expand Down Expand Up @@ -673,8 +678,9 @@ def test_nixl_backend(self):
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)

@pytest.mark.parametrize("overlap_scheduler", [False, True])
@skip_pre_hopper
@pytest.mark.skip_less_device(2)
@pytest.mark.parametrize("overlap_scheduler", [False, True])
def test_auto_dtype(self, overlap_scheduler):
ctx_server_config = {
"disable_overlap_scheduler": True,
Expand Down
31 changes: 16 additions & 15 deletions tests/integration/defs/accuracy/test_llm_api_pytorch.py
Original file line number Diff line number Diff line change
Expand Up @@ -695,25 +695,26 @@ def test_auto_dtype(self):

class TestMistralSmall24B(LlmapiAccuracyTestHarness):
MODEL_NAME = "mistralai/Mistral-Small-3.1-24B-Instruct-2503"
MODEL_PATH = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Out of curiosity, why were these changes necessary? I understand they're achieving the same thing (well, except that the previous version was also checking quant_algo == None for test_auto_dtype.


@pytest.mark.skip_less_device_memory(80000)
@pytest.mark.parametrize(
"model_path, expected_quant_algo",
[
# Original bfloat16 model.
(f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503", None),
# FP8 model.
pytest.param(
f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8",
QuantAlgo.FP8,
marks=skip_pre_ada,
),
],
)
def test_auto_dtype(self, model_path, expected_quant_algo):
def test_auto_dtype(self):
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
with LLM(self.MODEL_PATH, kv_cache_config=kv_cache_config) as llm:
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
task = MMLU(self.MODEL_NAME)
task.evaluate(llm)
task = GSM8K(self.MODEL_NAME)
task.evaluate(llm)

@skip_pre_ada
@pytest.mark.skip_less_device_memory(80000)
def test_fp8(self):
kv_cache_config = KvCacheConfig(free_gpu_memory_fraction=0.75)
model_path = f"{llm_models_root()}/Mistral-Small-3.1-24B-Instruct-2503-fp8"
with LLM(model_path, kv_cache_config=kv_cache_config) as llm:
assert llm.args.quant_config.quant_algo == expected_quant_algo
assert llm.args.quant_config.quant_algo == QuantAlgo.FP8
task = CnnDailymail(self.MODEL_NAME)
task.evaluate(llm)
task = MMLU(self.MODEL_NAME)
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_lists/qa/llm_function_full.txt
Original file line number Diff line number Diff line change
Expand Up @@ -454,8 +454,8 @@ accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[
accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_eagle3_tp8[eagle3_one_model=False]
accuracy/test_llm_api_pytorch.py::TestMistral7B::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8-cuda_graph=False]
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep4-cuda_graph=True]
accuracy/test_llm_api_pytorch.py::TestLlama4MaverickInstruct::test_auto_dtype[tp8ep8-cuda_graph=True]
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_lists/qa/llm_function_sanity.txt
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=False]
accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestKanana_Instruct::test_auto_dtype
accuracy/test_llm_api_pytorch.py::TestKimiK2::test_fp8_blockscale[latency]
accuracy/test_llm_api_pytorch.py::TestLlama3_1_8B::test_nvfp4
Expand Down
4 changes: 2 additions & 2 deletions tests/integration/test_lists/test-db/l0_h100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -205,8 +205,8 @@ l0_h100:
- accuracy/test_llm_api_pytorch.py::TestGemma3_1BInstruct::test_fp8_prequantized
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized
- accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_auto_dtype
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-None]
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype[/scratch.trt_llm_data/llm-models/Mistral-Small-3.1-24B-Instruct-2503-fp8-FP8]
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_auto_dtype
- accuracy/test_llm_api_pytorch.py::TestMistralSmall24B::test_fp8
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=False]
- accuracy/test_llm_api_pytorch.py::TestQwen3_30B_A3B::test_fp8_block_scales[latency-torch_compile=True]
- accuracy/test_llm_api_pytorch.py::TestLlama3_1_8BInstruct::test_guided_decoding[llguidance]
Expand Down