Skip to content

Commit 8c29407

Browse files
committed
Test new Slurm script
Signed-off-by: Yanchao Lu <[email protected]>
1 parent 67125cc commit 8c29407

File tree

4 files changed

+118
-52
lines changed

4 files changed

+118
-52
lines changed

jenkins/L0_Test.groovy

Lines changed: 62 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,6 @@ import groovy.json.JsonOutput
77
import com.nvidia.bloom.KubernetesManager
88
import com.nvidia.bloom.Constants
99
import com.nvidia.bloom.CloudManager
10-
import com.nvidia.bloom.KubernetesManager
1110
import com.nvidia.bloom.SlurmConfig
1211
import com.nvidia.bloom.SlurmCluster
1312
import com.nvidia.bloom.SlurmPartition
@@ -211,6 +210,13 @@ def executeLLMTestOnSlurm(pipeline, platform, testList, config=VANILLA_CONFIG, p
211210
sh "cp ${llmSrc}/cpp/build_backup/*.xml ${stageName} || true"
212211
sh "ls ${stageName}/ -all"
213212
})
213+
214+
// Clean up the workspace
215+
sh """
216+
env | sort
217+
pwd && ls -alh
218+
rm -rf ./*
219+
"""
214220
}
215221
}
216222

@@ -219,8 +225,11 @@ def runLLMTestlistOnSlurm(pipeline, platform, testList, config=VANILLA_CONFIG, p
219225
SlurmPartition partition = SlurmConfig.partitionConfig[platform] as SlurmPartition
220226
SlurmCluster cluster = SlurmConfig.clusterConfig[partition.clusterName]
221227

222-
def nodeName = "${cluster.host}-test-${UUID.randomUUID().toString()}"
223-
def nodeSecret = CloudManager.createNode(nodeName)
228+
// Create a unique suffix for the node name and workspace
229+
String customSuffix = "${env.BUILD_TAG}-${UUID.randomUUID().toString().replaceAll("-", "").substring(0, 6)}".toLowerCase()
230+
def nodeName = "${cluster.host}-test-${customSuffix}"
231+
def customWorkspace = "/tmp/${nodeName}"
232+
def nodeSecret = CloudManager.createNode(nodeName, customWorkspace)
224233

225234
try {
226235
// Run ssh command to start node in desired cluster via SLURM
@@ -263,12 +272,30 @@ def runLLMTestlistOnSlurm(pipeline, platform, testList, config=VANILLA_CONFIG, p
263272
}
264273

265274
if (CloudManager.isNodeOnline(nodeName)) {
266-
def dockerArgs = "--gpus ${gpuCount} --cap-add=SYS_ADMIN --ipc=host --security-opt seccomp=unconfined -u root:root -v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro -v /tmp/ccache:${CCACHE_DIR}:rw -v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw --cap-add syslog"
275+
node(nodeName) {
276+
sh """
277+
env | sort
278+
pwd && ls -alh
279+
ls -alh ${env.WORKSPACE}
280+
ls -alh ${env.WORKSPACE_TMP}
281+
"""
282+
}
283+
284+
def dockerArgs = "--gpus ${gpuCount} " +
285+
"--cap-add=SYS_ADMIN " +
286+
"--ipc=host " +
287+
"--security-opt seccomp=unconfined " +
288+
"-u root:root " +
289+
"-v /home/scratch.trt_llm_data:/scratch.trt_llm_data:ro " +
290+
"-v /tmp/ccache:${CCACHE_DIR}:rw " +
291+
"-v /tmp/pipcache/http-v2:/root/.cache/pip/http-v2:rw " +
292+
"--cap-add syslog"
267293

268294
if (partition.clusterName == "dlcluster") {
269295
dockerArgs += " -e NVIDIA_IMEX_CHANNELS=0"
270296
}
271-
slurmRunner = runInDockerOnNodeMultiStage(LLM_DOCKER_IMAGE, nodeName, dockerArgs, false)
297+
298+
slurmRunner = runInDockerOnNodeMultiStage(LLM_DOCKER_IMAGE, nodeName, dockerArgs, true)
272299
executeLLMTestOnSlurm(pipeline, platform, testList, config, perfMode, stageName, splitId, splits, skipInstallWheel, cpver, slurmRunner)
273300
} else {
274301
echo "The node does not come online in 2 hours, terminating the job"
@@ -560,6 +587,13 @@ def cacheErrorAndUploadResult(stageName, taskRunner, finallyRunner, noResultIfSu
560587
"${UPLOAD_PATH}/test-results/"
561588
)
562589
junit(testResults: "${stageName}/results*.xml")
590+
591+
// Clean up the workspace
592+
sh """
593+
env | sort
594+
pwd && ls -alh
595+
rm -rf ./*
596+
"""
563597
}
564598
}
565599
}
@@ -796,7 +830,7 @@ def echoNodeAndGpuInfo(pipeline, stageName)
796830

797831
def runLLMDocBuild(pipeline, config)
798832
{
799-
// Step 1: cloning tekit source code
833+
// Step 1: cloning source code
800834
sh "pwd && ls -alh"
801835
sh "env | sort"
802836
// allow to checkout from forked repo, svc_tensorrt needs to have access to the repo, otherwise clone will fail
@@ -1241,13 +1275,16 @@ def rerunFailedTests(stageName, llmSrc, testCmdLine) {
12411275

12421276
def runLLMTestlistOnPlatformImpl(pipeline, platform, testList, config=VANILLA_CONFIG, perfMode=false, stageName="Undefined", splitId=1, splits=1, skipInstallWheel=false, cpver="cp312")
12431277
{
1244-
// Step 1: create LLM_ROOT dir
1245-
sh "pwd && ls -alh"
1246-
// TODO: proper way to clean workspace, maybe save in a folder named with BUILD_ID.
1247-
// So that it can work with multiple job running in same node
1248-
sh "rm -rf ./*"
1278+
// Step 1: create LLM_ROOT dir and clean up the workspace
12491279
def llmRootConfig = "${LLM_ROOT}${config}"
1250-
sh "mkdir ${llmRootConfig}"
1280+
sh """
1281+
env | sort
1282+
pwd && ls -alh
1283+
rm -rf ./*
1284+
mkdir ${llmRootConfig}
1285+
ls -alh ${env.WORKSPACE}
1286+
ls -alh ${env.WORKSPACE_TMP}
1287+
"""
12511288

12521289
def llmPath = sh (script: "realpath ${llmRootConfig}", returnStdout: true).trim()
12531290
def llmSrc = "${llmPath}/TensorRT-LLM/src"
@@ -1562,6 +1599,13 @@ def runLLMTestlistOnPlatform(pipeline, platform, testList, config=VANILLA_CONFIG
15621599
sh "cp ${llmSrc}/cpp/build_backup/*.xml ${stageName} || true"
15631600
sh "ls ${stageName}/ -all"
15641601
})
1602+
1603+
// Clean up the workspace
1604+
sh """
1605+
env | sort
1606+
pwd && ls -alh
1607+
rm -rf ./*
1608+
"""
15651609
}
15661610

15671611

@@ -1890,12 +1934,8 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
18901934
fullSet += SBSATestConfigs.keySet()
18911935

18921936
SBSASlurmTestConfigs = [
1893-
"GB200-PyTorch-1": ["gb200-unrestricted", "l0_gb200", 1, 3],
1894-
"GB200-PyTorch-2": ["gb200-unrestricted", "l0_gb200", 2, 3],
1895-
"GB200-PyTorch-3": ["gb200-unrestricted", "l0_gb200", 3, 3],
1896-
"GB200-TensorRT-1": ["gb200-unrestricted", "l0_gb200", 1, 2],
1897-
"GB200-TensorRT-2": ["gb200-unrestricted", "l0_gb200", 2, 2],
1898-
"GB200-Triton-Post-Merge-1": ["gb200-unrestricted", "l0_gb200", 1, 1],
1937+
// Not used in the pipeline now
1938+
// "GB200-PyTorch-1": ["gb200-single", "l0_gb200", 1, 3],
18991939
"GB200-4_GPUs-PyTorch-1": ["gb200-x4", "l0_gb200_multi_gpus", 1, 1, 4],
19001940
"GB200-4_GPUs-PyTorch-Post-Merge-1": ["gb200-x4", "l0_gb200_multi_gpus", 1, 1, 4],
19011941
]
@@ -1909,7 +1949,6 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
19091949
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-4": ["gb200-multi-node", "l0_gb200_multi_nodes", 4, 7, 8, 2],
19101950
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-5": ["gb200-multi-node", "l0_gb200_multi_nodes", 5, 7, 8, 2],
19111951
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-6": ["gb200-multi-node", "l0_gb200_multi_nodes", 6, 7, 8, 2],
1912-
"GB200-8_GPUs-2_Nodes-PyTorch-Post-Merge-7": ["gb200-multi-node", "l0_gb200_multi_nodes", 7, 7, 8, 2],
19131952
]
19141953
fullSet += multiNodesSBSAConfigs.keySet()
19151954

@@ -2129,7 +2168,9 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
21292168
echo "###### Check pip install Start ######"
21302169
withEnv(libEnv) {
21312170
sh "env | sort"
2132-
checkPipInstall(pipeline, "${cpu_arch}/${wheelPath}")
2171+
timeout(time: 1, unit: 'HOURS') {
2172+
checkPipInstall(pipeline, "${cpu_arch}/${wheelPath}")
2173+
}
21332174
}
21342175
echo "###### Run LLMAPI tests Start ######"
21352176
def config = VANILLA_CONFIG
@@ -2464,7 +2505,7 @@ pipeline {
24642505

24652506
def testPhase2StageName = env.testPhase2StageName
24662507
if (testPhase2StageName) {
2467-
def dgxSigns = ["DGX_H100", "DGX_H200", "GB200-4_GPUs", "GB200-8_GPUs", "DGX_B200", "RTXPro6000-4_GPUs"]
2508+
def dgxSigns = ["2_GPUs", "4_GPUs", "8_GPUs"]
24682509
singleGpuJobs = parallelJobs.findAll{!dgxSigns.any{sign -> it.key.contains(sign)}}
24692510
dgxJobs = parallelJobs.findAll{dgxSigns.any{sign -> it.key.contains(sign)}}
24702511
}

jenkins/scripts/slurm_run.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,7 @@ else
3434
done
3535
fi
3636
testList="$testList_$splitId"
37-
export CPP_TEST_TIMEOUT_OVERRIDDEN=7200
37+
export CPP_TEST_TIMEOUT_OVERRIDDEN=$pytestTestTimeout
3838
export LLM_ROOT=$llmSrcNode
3939
export LLM_MODELS_ROOT=$MODEL_CACHE_DIR
4040
export UCX_TLS=^gdr_copy
@@ -43,6 +43,7 @@ testCmdLines=(
4343
"$llmSrcNode/tensorrt_llm/llmapi/trtllm-llmapi-launch"
4444
"pytest"
4545
"-v"
46+
"--timeout-method=thread",
4647
"--timeout=$pytestTestTimeout"
4748
"--test-list=$testListPathNode"
4849
"--waives-file=$waivesListPathNode"

tests/integration/test_lists/test-db/l0_gb200.yml

Lines changed: 53 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# Don't add any tests here.
2+
# Copied from l0_b200.yml but not used in the pipeline now
13
version: 0.0.1
24
l0_gb200:
35
- condition:
@@ -21,22 +23,10 @@ l0_gb200:
2123
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=0-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]
2224
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=False]
2325
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16[mtp_nextn=2-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False-enable_chunked_prefill=True]
24-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
25-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
26-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
2726
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=True]
28-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
29-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
30-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
31-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]
3227
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
33-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
3428
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
35-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
36-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
37-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
3829
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
39-
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
4030
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
4131
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_no_kv_cache_reuse[quant_dtype=none-mtp_nextn=2-fp8kv=False-attention_dp=True-cuda_graph=True-overlap_scheduler=True]
4232
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_no_kv_cache_reuse[quant_dtype=nvfp4-mtp_nextn=0-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True]
@@ -67,15 +57,21 @@ l0_gb200:
6757
- test_e2e.py::test_ptp_quickstart_advanced_eagle3[Llama-3.1-8b-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct-EAGLE3-LLaMA3.1-Instruct-8B]
6858
- test_e2e.py::test_ptp_quickstart_advanced_ngram[Llama-3.1-8B-Instruct-llama-3.1-model/Llama-3.1-8B-Instruct]
6959
- test_e2e.py::test_trtllm_bench_pytorch_backend_sanity[meta-llama/Llama-3.1-8B-llama-3.1-8b-False-False]
70-
- unittest/_torch -k "not (modeling or multi_gpu or auto_deploy)" TIMEOUT (120)
71-
- unittest/_torch -k "modeling_llama"
60+
- unittest/_torch/attention
61+
- unittest/_torch/compilation
62+
- unittest/_torch/debugger
63+
- unittest/_torch/executor
64+
- unittest/_torch/misc
65+
- unittest/_torch/modules
66+
- unittest/_torch/multimodal
67+
- unittest/_torch/sampler
68+
- unittest/_torch/speculative
69+
- unittest/_torch/thop
70+
- unittest/_torch/modeling -k "modeling_llama"
7271
- unittest/_torch/modeling -k "modeling_mixtral"
7372
- unittest/_torch/modeling -k "modeling_deepseek"
7473
- unittest/_torch/modeling -k "modeling_gpt_oss"
7574
- unittest/_torch/auto_deploy/unit/singlegpu -k "not test_trtllm_bench_backend_comparison"
76-
- unittest/_torch/speculative/test_eagle3.py
77-
- unittest/_torch/speculative/test_kv_cache_reuse.py
78-
- unittest/_torch/speculative/test_dynamic_spec_decode.py
7975
- condition:
8076
ranges:
8177
system_gpu_count:
@@ -87,7 +83,7 @@ l0_gb200:
8783
linux_distribution_name: ubuntu*
8884
cpu: aarch64
8985
terms:
90-
stage: pre_merge
86+
stage: post_merge
9187
backend: tensorrt
9288
tests:
9389
# ------------- TRT tests ---------------
@@ -103,20 +99,47 @@ l0_gb200:
10399
- unittest/llmapi/test_llm_quant.py
104100
- unittest/trt/functional/test_fp4_gemm.py
105101
- condition:
106-
ranges:
107-
system_gpu_count:
108-
gte: 1
109-
lte: 1
110-
wildcards:
111-
gpu:
112-
- '*gb200*'
113-
linux_distribution_name: ubuntu*
114-
cpu: aarch64
115-
terms:
116-
stage: post_merge
117-
backend: triton
102+
ranges:
103+
system_gpu_count:
104+
gte: 1
105+
lte: 1
106+
wildcards:
107+
gpu:
108+
- '*gb200*'
109+
linux_distribution_name: ubuntu*
110+
cpu: aarch64
111+
terms:
112+
stage: post_merge
113+
backend: triton
118114
tests:
119115
# ------------- Triton tests ---------------
120116
- triton_server/test_triton.py::test_llava[llava]
121117
- triton_server/test_triton.py::test_gpt_ib_ptuning[gpt-ib-ptuning]
122118
- triton_server/test_triton.py::test_gpt_2b_ib_lora[gpt-2b-ib-lora]
119+
- condition:
120+
ranges:
121+
system_gpu_count:
122+
gte: 1
123+
lte: 1
124+
wildcards:
125+
gpu:
126+
- '*gb200*'
127+
linux_distribution_name: ubuntu*
128+
cpu: aarch64
129+
terms:
130+
stage: post_merge
131+
backend: pytorch
132+
tests:
133+
# ------------- PyTorch tests ---------------
134+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
135+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
136+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
137+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
138+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=True-attention_dp=False-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
139+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=False]
140+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=True-cuda_graph=False-overlap_scheduler=False-torch_compile=True]
141+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=False-overlap_scheduler=True-torch_compile=False]
142+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
143+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=0-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
144+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=CUTLASS-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]
145+
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4[moe_backend=TRTLLM-mtp_nextn=2-fp8kv=False-attention_dp=False-cuda_graph=True-overlap_scheduler=False-torch_compile=False]

tests/integration/test_lists/test-db/l0_gb200_multi_gpus.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -66,3 +66,4 @@ l0_gb200_multi_gpus:
6666
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus[moe_backend=TRTLLM-mtp_nextn=2-ep4-fp8kv=True-attention_dp=True-cuda_graph=True-overlap_scheduler=True-torch_compile=False]
6767
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_nvfp4_4gpus_online_eplb[fp8kv=True]
6868
- accuracy/test_llm_api_pytorch.py::TestDeepSeekV3Lite::test_bfloat16_4gpus_online_eplb[mtp_nextn=2]
69+
- accuracy/test_llm_api_pytorch.py::TestQwen3_235B_A22B::test_nvfp4_4gpus[latency_moe_trtllm_eagle3] TIMEOUT (90)

0 commit comments

Comments
 (0)