From 155a23839622a8b227118f55065a2114c82c6f34 Mon Sep 17 00:00:00 2001 From: Bo Deng Date: Fri, 15 Aug 2025 07:59:28 +0000 Subject: [PATCH 1/2] [https://nvbugs/5448437,https://nvbugs/5448449][fix] fix some nixl tests Signed-off-by: Bo Deng --- .../defs/accuracy/test_disaggregated_serving.py | 6 +++--- .../defs/disaggregated/test_disaggregated.py | 10 +++++++--- tests/integration/test_lists/test-db/l0_dgx_b200.yml | 3 --- tests/integration/test_lists/waives.txt | 3 +-- 4 files changed, 11 insertions(+), 11 deletions(-) diff --git a/tests/integration/defs/accuracy/test_disaggregated_serving.py b/tests/integration/defs/accuracy/test_disaggregated_serving.py index 9da5d279f98..0343c8bb054 100644 --- a/tests/integration/defs/accuracy/test_disaggregated_serving.py +++ b/tests/integration/defs/accuracy/test_disaggregated_serving.py @@ -21,7 +21,7 @@ from tensorrt_llm.llmapi.llm_args import LlmArgs from ..conftest import (get_device_count, llm_models_root, parametrize_with_ids, - skip_pre_hopper) + skip_no_hopper, skip_pre_hopper) from ..trt_test_alternative import popen from .accuracy_core import (GSM8K, MMLU, LlmapiAccuracyTestHarness, get_accuracy_task) @@ -508,6 +508,7 @@ class TestDeepSeekV3Lite(LlmapiAccuracyTestHarness): @pytest.mark.skip_less_device(2) @pytest.mark.skip_less_device_memory(60000) + @skip_no_hopper def test_nixl_backend(self): ctx_server_config = { "disable_overlap_scheduler": True, @@ -642,6 +643,7 @@ class TestQwen3_8B(LlmapiAccuracyTestHarness): MODEL_PATH = f"{llm_models_root()}/Qwen3/Qwen3-8B-FP8" @pytest.mark.skip_less_device(2) + @skip_no_hopper def test_nixl_backend(self): ctx_server_config = { "disable_overlap_scheduler": True, @@ -673,8 +675,6 @@ def test_nixl_backend(self): with launch_disaggregated_llm(disaggregated_server_config, ctx_server_config, gen_server_config, self.MODEL_PATH) as llm: - task = MMLU(self.MODEL_NAME) - task.evaluate(llm) task = GSM8K(self.MODEL_NAME) task.evaluate(llm) diff --git a/tests/integration/defs/disaggregated/test_disaggregated.py b/tests/integration/defs/disaggregated/test_disaggregated.py index a02d5a1a16c..9f20653a032 100644 --- a/tests/integration/defs/disaggregated/test_disaggregated.py +++ b/tests/integration/defs/disaggregated/test_disaggregated.py @@ -20,7 +20,8 @@ import pytest import yaml -from defs.conftest import llm_models_root, skip_arm, skip_no_hopper +from defs.conftest import (get_sm_version, llm_models_root, skip_arm, + skip_no_hopper) from defs.trt_test_alternative import check_call, check_output, popen from tensorrt_llm.logger import logger @@ -1212,7 +1213,7 @@ def get_config_for_benchmark(model_root, backend): "num_instances": 1, "max_batch_size": 2, "max_num_tokens": 384, - "max_seq_len": 320, + "max_seq_len": 384, "tensor_parallel_size": 1, "pipeline_parallel_size": 1, "disable_overlap_scheduler": True, @@ -1228,7 +1229,7 @@ def get_config_for_benchmark(model_root, backend): "pipeline_parallel_size": 1, "max_batch_size": 2, "max_num_tokens": 384, - "max_seq_len": 320, + "max_seq_len": 384, "cache_transceiver_config": { "backend": backend, "max_tokens_in_buffer": 512, @@ -1247,6 +1248,9 @@ def get_config_for_benchmark(model_root, backend): def test_disaggregated_benchmark_on_diff_backends( disaggregated_test_root, disaggregated_example_root, llm_venv, benchmark_model_root, benchmark_root, shared_gpt_path): + if "DeepSeek-V3-Lite" in benchmark_model_root and "fp8" in benchmark_model_root and get_sm_version( + ) != 90: + pytest.skip("The test should only run on Hopper") nixl_config = get_config_for_benchmark(benchmark_model_root, "nixl") ucx_config = get_config_for_benchmark(benchmark_model_root, "ucx") temp_dir = tempfile.TemporaryDirectory() diff --git a/tests/integration/test_lists/test-db/l0_dgx_b200.yml b/tests/integration/test_lists/test-db/l0_dgx_b200.yml index 520c979858f..0683346a687 100644 --- a/tests/integration/test_lists/test-db/l0_dgx_b200.yml +++ b/tests/integration/test_lists/test-db/l0_dgx_b200.yml @@ -72,6 +72,3 @@ l0_dgx_b200: - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-bf16] - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf] - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8] - - disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8] - - accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend - - accuracy/test_disaggregated_serving.py::TestDeepSeekV3Lite::test_nixl_backend diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 8c87ead0af0..9f192bc15da 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -274,8 +274,7 @@ examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-9b-it] SK examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-27b-it] SKIP (https://nvbugs/5434451) examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-3-1b-it] SKIP (https://nvbugs/5434451) accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_eagle3] SKIP (https://nvbugs/5437384) -accuracy/test_disaggregated_serving.py::TestQwen3_8B::test_nixl_backend SKIP (https://nvbugs/5448437) -disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-fp8] SKIP (https://nvbugs/5448449) +accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized SKIP (https://nvbugs/5445774) test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False] SKIP (https://nvbugs/5444095) full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen1.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837) full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837) From 5caa636d980f98b4c213e6ba58b8a46c2a04c3d2 Mon Sep 17 00:00:00 2001 From: Bo Deng Date: Fri, 15 Aug 2025 08:25:16 +0000 Subject: [PATCH 2/2] fix list Signed-off-by: Bo Deng --- tests/integration/test_lists/waives.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/tests/integration/test_lists/waives.txt b/tests/integration/test_lists/waives.txt index 9f192bc15da..0ac79f66797 100644 --- a/tests/integration/test_lists/waives.txt +++ b/tests/integration/test_lists/waives.txt @@ -274,7 +274,6 @@ examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-9b-it] SK examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-2-27b-it] SKIP (https://nvbugs/5434451) examples/test_gemma.py::test_hf_gemma_fp8_base_bf16_multi_lora[gemma-3-1b-it] SKIP (https://nvbugs/5434451) accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm_eagle3] SKIP (https://nvbugs/5437384) -accuracy/test_llm_api_pytorch.py::TestGemma3_27BInstruct::test_fp8_prequantized SKIP (https://nvbugs/5445774) test_e2e.py::test_ptp_quickstart_multimodal[llava-v1.6-mistral-7b-llava-v1.6-mistral-7b-hf-image-False] SKIP (https://nvbugs/5444095) full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen1.5_7b_chat-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837) full:GB200/examples/test_qwen.py::test_llm_qwen_7b_multi_gpus_summary[qwen2_7b_instruct-enable_fmha_fp32_acc-enable_plugin-tp2pp2-nb:4] SKIP (https://nvbugs/5247837)