Skip to content

Commit 62459d5

Browse files
[None][chore] Update pre-merge test to add DeepSeek/LLaMA and gpt-oss (#7192)
Signed-off-by: Pengbo Wang <[email protected]> Signed-off-by: Pengbo Wang @ NVIDIA <[email protected]> Co-authored-by: Tao Li @ NVIDIA <[email protected]>
1 parent 37a1bd8 commit 62459d5

File tree

8 files changed

+89
-15
lines changed

8 files changed

+89
-15
lines changed

jenkins/L0_Test.groovy

Lines changed: 12 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1991,8 +1991,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
19911991
x86SlurmTestConfigs = [
19921992
"DGX_B200-4_GPUs-PyTorch-1": ["b200-x4", "l0_dgx_b200", 1, 2, 4],
19931993
"DGX_B200-4_GPUs-PyTorch-2": ["b200-x4", "l0_dgx_b200", 2, 2, 4],
1994-
"DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-x4", "l0_dgx_b200", 1, 2, 4],
1995-
"DGX_B200-4_GPUs-PyTorch-Post-Merge-2": ["b200-x4", "l0_dgx_b200", 2, 2, 4],
1994+
"DGX_B200-8_GPUs-PyTorch-1": ["b200-x8", "l0_dgx_b200", 1, 1, 8],
1995+
"DGX_B200-4_GPUs-PyTorch-Post-Merge-1": ["b200-x4", "l0_dgx_b200", 1, 1, 4],
19961996
]
19971997
fullSet += x86SlurmTestConfigs.keySet()
19981998

@@ -2012,8 +2012,7 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
20122012
// Try to match what are being tested on x86 H100_PCIe.
20132013
// The total machine time is scaled proportionally according to the number of each GPU.
20142014
SBSATestConfigs = [
2015-
"GH200-TensorRT-Post-Merge-1": ["gh200", "l0_gh200", 1, 2],
2016-
"GH200-TensorRT-Post-Merge-2": ["gh200", "l0_gh200", 2, 2],
2015+
"GH200-TensorRT-Post-Merge-1": ["gh200", "l0_gh200", 1, 1],
20172016
]
20182017
fullSet += SBSATestConfigs.keySet()
20192018

@@ -2026,12 +2025,15 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
20262025

20272026
multiNodesSBSAConfigs = [
20282027
// Each stage test 1 testcase with 8 GPUs and 2 nodes.
2029-
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-multi-node", "l0_gb200_multi_nodes", 1, 7, 8, 2],
2030-
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["gb200-multi-node", "l0_gb200_multi_nodes", 2, 7, 8, 2],
2031-
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["gb200-multi-node", "l0_gb200_multi_nodes", 3, 7, 8, 2],
2032-
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-4": ["gb200-multi-node", "l0_gb200_multi_nodes", 4, 7, 8, 2],
2033-
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-5": ["gb200-multi-node", "l0_gb200_multi_nodes", 5, 7, 8, 2],
2034-
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-6": ["gb200-multi-node", "l0_gb200_multi_nodes", 6, 7, 8, 2],
2028+
"GB200-8_GPUs-2_Nodes-PyTorch-1": ["gb200-multi-node", "l0_gb200_multi_nodes", 1, 4, 8, 2],
2029+
"GB200-8_GPUs-2_Nodes-PyTorch-2": ["gb200-multi-node", "l0_gb200_multi_nodes", 2, 4, 8, 2],
2030+
"GB200-8_GPUs-2_Nodes-PyTorch-3": ["gb200-multi-node", "l0_gb200_multi_nodes", 3, 4, 8, 2],
2031+
"GB200-8_GPUs-2_Nodes-PyTorch-4": ["gb200-multi-node", "l0_gb200_multi_nodes", 4, 4, 8, 2],
2032+
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-1": ["gb200-multi-node", "l0_gb200_multi_nodes", 1, 5, 8, 2],
2033+
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-2": ["gb200-multi-node", "l0_gb200_multi_nodes", 2, 5, 8, 2],
2034+
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-3": ["gb200-multi-node", "l0_gb200_multi_nodes", 3, 5, 8, 2],
2035+
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-4": ["gb200-multi-node", "l0_gb200_multi_nodes", 4, 5, 8, 2],
2036+
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-5": ["gb200-multi-node", "l0_gb200_multi_nodes", 5, 5, 8, 2],
20352037
]
20362038
fullSet += multiNodesSBSAConfigs.keySet()
20372039

tests/integration/defs/accuracy/references/gsm8k.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,10 @@ deepseek-ai/DeepSeek-R1:
7272
- quant_algo: NVFP4
7373
kv_cache_quant_algo: FP8
7474
accuracy: 95.42
75+
- quant_algo: NVFP4
76+
kv_cache_quant_algo: FP8
77+
spec_dec_algo: MTP
78+
accuracy: 95.42
7579
- quant_algo: FP8_BLOCK_SCALES
7680
accuracy: 95.413
7781
- quant_algo: FP8_BLOCK_SCALES
@@ -80,6 +84,10 @@ deepseek-ai/DeepSeek-R1:
8084
- quant_algo: FP8_BLOCK_SCALES
8185
kv_cache_quant_algo: FP8
8286
accuracy: 95.413
87+
- quant_algo: FP8_BLOCK_SCALES
88+
kv_cache_quant_algo: FP8
89+
spec_dec_algo: MTP
90+
accuracy: 95.413
8391
Qwen3/Qwen3-8B:
8492
- accuracy: 87.1114
8593
- quant_algo: FP8

tests/integration/defs/accuracy/references/mmlu.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -166,6 +166,10 @@ deepseek-ai/DeepSeek-R1:
166166
- quant_algo: NVFP4
167167
kv_cache_quant_algo: FP8
168168
accuracy: 87.33
169+
- quant_algo: NVFP4
170+
kv_cache_quant_algo: FP8
171+
spec_dec_algo: MTP
172+
accuracy: 87.33
169173
- quant_algo: FP8_BLOCK_SCALES
170174
accuracy: 87.573
171175
- quant_algo: FP8_BLOCK_SCALES
@@ -174,6 +178,10 @@ deepseek-ai/DeepSeek-R1:
174178
- quant_algo: FP8_BLOCK_SCALES
175179
kv_cache_quant_algo: FP8
176180
accuracy: 87.573
181+
- quant_algo: FP8_BLOCK_SCALES
182+
kv_cache_quant_algo: FP8
183+
spec_dec_algo: MTP
184+
accuracy: 87.573
177185
Qwen3/Qwen3-8B:
178186
- quant_algo: W4A8_MXFP4_FP8
179187
accuracy: 72.70

tests/integration/defs/accuracy/test_llm_api_pytorch.py

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1775,10 +1775,21 @@ class TestDeepSeekR1(LlmapiAccuracyTestHarness):
17751775
16,
17761776
"CUTLASS",
17771777
marks=pytest.mark.skip_less_mpi_world_size(4)),
1778+
pytest.param(8,
1779+
1,
1780+
8,
1781+
1,
1782+
True,
1783+
True,
1784+
True,
1785+
True,
1786+
32,
1787+
"CUTLASS",
1788+
marks=pytest.mark.skip_less_mpi_world_size(8)),
17781789
],
17791790
ids=[
17801791
"latency", "latency_trtllmgen", "throughput", "throughput_tp8",
1781-
"throughput_tp4"
1792+
"throughput_tp4", "throughput_mtp"
17821793
])
17831794
def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
17841795
attention_dp, cuda_graph, overlap_scheduler,
@@ -1822,8 +1833,9 @@ def test_nvfp4_multi_gpus(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
18221833
@pytest.mark.parametrize(
18231834
"tp_size,pp_size,ep_size,mtp_nextn,fp8kv,attention_dp,cuda_graph,overlap_scheduler,max_batch_size",
18241835
[(8, 1, 4, 3, False, False, True, True, 1),
1825-
(8, 1, 8, 0, True, True, True, True, 24)],
1826-
ids=["latency", "throughput"])
1836+
(8, 1, 8, 0, True, True, True, True, 24),
1837+
(8, 1, 8, 1, True, True, True, True, 24)],
1838+
ids=["latency", "throughput", "throughput_mtp"])
18271839
def test_fp8_blockscale(self, tp_size, pp_size, ep_size, mtp_nextn, fp8kv,
18281840
attention_dp, cuda_graph, overlap_scheduler,
18291841
max_batch_size):

tests/integration/test_lists/test-db/l0_dgx_b200.yml

Lines changed: 21 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,27 @@ l0_dgx_b200:
4949
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[DeepSeek-V3-Lite-bf16]
5050
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-3.1-8b-instruct-hf-fp8]
5151
- disaggregated/test_disaggregated.py::test_disaggregated_deepseek_v3_lite_fp8_nixl[DeepSeek-V3-Lite-fp8]
52+
- accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
53+
- accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
54+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
55+
- condition:
56+
ranges:
57+
system_gpu_count:
58+
gte: 8
59+
lte: 8
60+
wildcards:
61+
gpu:
62+
- '*b200*'
63+
linux_distribution_name: ubuntu*
64+
cpu: x86_64
65+
terms:
66+
stage: pre_merge
67+
backend: pytorch
68+
tests:
69+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput] TIMEOUT (180)
70+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_mtp] TIMEOUT (180)
71+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] TIMEOUT (180)
72+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp] TIMEOUT (180)
5273
- condition:
5374
ranges:
5475
system_gpu_count:
@@ -97,7 +118,6 @@ l0_dgx_b200:
97118
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-cutlass]
98119
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-triton]
99120
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[ep4-trtllm]
100-
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
101121
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-triton]
102122
- disaggregated/test_disaggregated.py::test_disaggregated_benchmark_on_diff_backends[llama-v3-8b-hf]
103123
# ------------- AutoDeploy tests ---------------

tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,10 @@ l0_gb200_multi_gpus:
3535
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=0-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
3636
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
3737
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2]
38+
- accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_fp8_tp4
39+
- accuracy/test_llm_api_pytorch.py::TestLlama3_3_70BInstruct::test_nvfp4_tp4
40+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[tp4-trtllm]
41+
- accuracy/test_llm_api_pytorch.py::TestGPTOSS::test_w4_4gpus[dp4-cutlass]
3842
- condition:
3943
ranges:
4044
system_gpu_count:

tests/integration/test_lists/test-db/l0_gb200_multi_nodes.yml

Lines changed: 17 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,22 @@
11
version: 0.0.1
22
l0_gb200_multi_nodes:
3+
- condition:
4+
ranges:
5+
# 2 nodes with each node has 4 GPUs
6+
system_gpu_count:
7+
gte: 8
8+
lte: 8
9+
wildcards:
10+
gpu:
11+
- '*gb200*'
12+
terms:
13+
stage: pre_merge
14+
backend: pytorch
15+
tests:
16+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput] TIMEOUT (180)
17+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_mtp] TIMEOUT (180)
18+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] TIMEOUT (180)
19+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp] TIMEOUT (180)
320
- condition:
421
ranges:
522
# 2 nodes with each node has 4 GPUs
@@ -16,6 +33,5 @@ l0_gb200_multi_nodes:
1633
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency] TIMEOUT (180)
1734
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] TIMEOUT (180)
1835
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[latency_trtllmgen] TIMEOUT (180)
19-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] TIMEOUT (180)
2036
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_cutlass] TIMEOUT (90)
2137
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4[latency_moe_trtllm] TIMEOUT (90)

tests/integration/test_lists/waives.txt

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -311,6 +311,10 @@ examples/test_mixtral.py::test_llm_mixtral_moe_plugin_lora_4gpus[Mixtral-8x7B-v0
311311
accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_tp8] SKIP (https://nvbugs/5455140)
312312
unittest/_torch/multi_gpu_modeling/test_llama4.py::test_llama4[pp1-ep4-enable_adp-enable_graph-tp8-trtllm-scout] SKIP (https://nvbugs/5477730)
313313
test_e2e.py::test_openai_chat_example[trt] SKIP (https://nvbugs/5477444)
314+
full:GB200/accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput] SKIP (https://nvbugs/5455140,https://nvbugs/5445466)
315+
full:GB200/accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_nvfp4_multi_gpus[throughput_mtp] SKIP (https://nvbugs/5455140,https://nvbugs/5445466)
316+
full:GB200/accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput] SKIP (https://nvbugs/5455140,https://nvbugs/5445466)
317+
full:GB200/accuracy/test_llm_api_pytorch.py::TestDeepSeekR1::test_fp8_blockscale[throughput_mtp] SKIP (https://nvbugs/5455140,https://nvbugs/5445466)
314318
examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:8-cpp_e2e:False-nb:1] SKIP (https://nvbugs/5448462)
315319
examples/test_multimodal.py::test_llm_multimodal_general[fuyu-8b-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5448462)
316320
examples/test_multimodal.py::test_llm_multimodal_general[llava-1.5-7b-hf-pp:1-tp:1-float16-bs:8-cpp_e2e:True-nb:1] SKIP (https://nvbugs/5448479)

0 commit comments

Comments
 (0)