|
1 |
| -@Library(['bloom-jenkins-shared-lib@main', 'trtllm-jenkins-shared-lib@main']) _ |
| 1 | +@Library(['bloom-jenkins-shared-lib@user/yiqingy/dgx_b300', 'trtllm-jenkins-shared-lib@main']) _ |
2 | 2 |
|
3 | 3 | import java.lang.InterruptedException
|
4 | 4 | import groovy.transform.Field
|
@@ -673,6 +673,10 @@ def createKubernetesPodConfig(image, type, arch = "amd64", gpuCount = 1, perfMod
|
673 | 673 | def driverVersion = Constants.DEFAULT_NVIDIA_DRIVER_VERSION
|
674 | 674 | def cpuCount = "${TESTER_CORES}"
|
675 | 675 |
|
| 676 | +<<<<<<< HEAD |
| 677 | +======= |
| 678 | + // Multi-GPU only supports DGX-H100 and DGX-H200 due to the hardware stability. |
| 679 | +>>>>>>> fix |
676 | 680 | if (hasMultipleGPUs)
|
677 | 681 | {
|
678 | 682 | // Not a hard requirement, but based on empirical values.
|
@@ -1873,9 +1877,21 @@ def launchTestJobs(pipeline, testFilter, dockerNode=null)
|
1873 | 1877 | "DGX_H200-4_GPUs-TensorRT-Post-Merge-1": ["dgx-h200-x4", "l0_dgx_h200", 1, 3, 4],
|
1874 | 1878 | "DGX_H200-4_GPUs-TensorRT-Post-Merge-2": ["dgx-h200-x4", "l0_dgx_h200", 2, 3, 4],
|
1875 | 1879 | "DGX_H200-4_GPUs-TensorRT-Post-Merge-3": ["dgx-h200-x4", "l0_dgx_h200", 3, 3, 4],
|
| 1880 | +<<<<<<< HEAD |
| 1881 | +<<<<<<< HEAD |
| 1882 | +<<<<<<< HEAD |
1876 | 1883 | "RTXPro6000-Pytorch-Post-Merge-1": ["rtx-pro-6000", "l0_rtx_pro_6000", 1, 1],
|
1877 | 1884 | "RTXPro6000-4_GPUs-Pytorch-Post-Merge-1": ["rtx-pro-6000-x4", "l0_rtx_pro_6000", 1, 2, 4],
|
1878 | 1885 | "RTXPro6000-4_GPUs-Pytorch-Post-Merge-2": ["rtx-pro-6000-x4", "l0_rtx_pro_6000", 2, 2, 4],
|
| 1886 | +======= |
| 1887 | + "B300_PCIe-PyTorch-1": ["b300", "l0_b300", 1, 1], |
| 1888 | +>>>>>>> Add b300 tests |
| 1889 | +======= |
| 1890 | + "DGX_B300-4_GPUs-PyTorch-1": ["b300-x4", "l0_dgx_b300", 1, 1, 4], |
| 1891 | +>>>>>>> fix |
| 1892 | +======= |
| 1893 | + "DGX_B300-4_GPUs-PyTorch-1": ["dgx-b300-x4", "l0_dgx_b300", 1, 1, 4], |
| 1894 | +>>>>>>> fix |
1879 | 1895 | ]
|
1880 | 1896 |
|
1881 | 1897 | parallelJobs = x86TestConfigs.collectEntries{key, values -> [key, [createKubernetesPodConfig(LLM_DOCKER_IMAGE, values[0], "amd64", values[4] ?: 1, key.contains("Perf")), {
|
@@ -2494,7 +2510,11 @@ pipeline {
|
2494 | 2510 |
|
2495 | 2511 | def testPhase2StageName = env.testPhase2StageName
|
2496 | 2512 | if (testPhase2StageName) {
|
| 2513 | +<<<<<<< HEAD |
2497 | 2514 | def dgxSigns = ["2_GPUs", "4_GPUs", "8_GPUs"]
|
| 2515 | +======= |
| 2516 | + def dgxSigns = ["DGX_H100", "DGX_H200", "GB200", "DGX_B200", "DGX_B300"] |
| 2517 | +>>>>>>> fix |
2498 | 2518 | singleGpuJobs = parallelJobs.findAll{!dgxSigns.any{sign -> it.key.contains(sign)}}
|
2499 | 2519 | dgxJobs = parallelJobs.findAll{dgxSigns.any{sign -> it.key.contains(sign)}}
|
2500 | 2520 | }
|
|
0 commit comments