From 4ee29df3eb4292a515ca0c5c3c5360aa552dc3a0 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Wed, 2 Jul 2025 14:05:31 +0000 Subject: [PATCH 01/23] [Infra][TRTLLM-6224] - Upgrade dependencies to DLFW 25.06 and CUDA 12.9.1 Signed-off-by: Yiqing Yan --- README.md | 2 +- constraints.txt | 10 +--------- docker/Dockerfile.multi | 4 ++-- docker/Makefile | 6 +++--- docker/common/install_cuda_toolkit.sh | 2 +- docker/common/install_pytorch.sh | 2 +- docker/common/install_tensorrt.sh | 19 +++++++++---------- docs/source/reference/support-matrix.md | 2 +- jenkins/L0_Test.groovy | 2 +- requirements.txt | 2 +- 10 files changed, 21 insertions(+), 30 deletions(-) diff --git a/README.md b/README.md index 15449460963..16626968f61 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ TensorRT-LLM [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://nvidia.github.io/TensorRT-LLM/) [![python](https://img.shields.io/badge/python-3.12-green)](https://www.python.org/downloads/release/python-3123/) [![python](https://img.shields.io/badge/python-3.10-green)](https://www.python.org/downloads/release/python-31012/) -[![cuda](https://img.shields.io/badge/cuda-12.9.0-green)](https://developer.nvidia.com/cuda-downloads) +[![cuda](https://img.shields.io/badge/cuda-12.9.1-green)](https://developer.nvidia.com/cuda-downloads) [![trt](https://img.shields.io/badge/TRT-10.11.0-green)](https://developer.nvidia.com/tensorrt) [![version](https://img.shields.io/badge/release-1.0.0rc5-green)](./tensorrt_llm/version.py) [![license](https://img.shields.io/badge/license-Apache%202-blue)](./LICENSE) diff --git a/constraints.txt b/constraints.txt index 160cf2a751f..e276ea19527 100644 --- a/constraints.txt +++ b/constraints.txt @@ -1,13 +1,5 @@ -# These vulnerabilities were inherited from the base image (pytorch:25.05-py3) and should be removed when the base image +# These vulnerabilities were inherited from the base image (pytorch:25.06-py3) and should be removed when the base image # is updated. -# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj -h11>=0.16.0 -# WAR against https://github.com/advisories/GHSA-7cx3-6m66-7c5m -tornado>=6.5.0 -# WAR against https://github.com/advisories/GHSA-5rjg-fvgr-3xxf -setuptools>=78.1.1 # WAR against https://github.com/advisories/GHSA-8qvm-5x2c-j2w7 protobuf>=4.25.8 -# WAR against https://github.com/advisories/GHSA-33p9-3p43-82vq -jupyter-core>=5.8.1 diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index 0d156c7a764..b5622677b50 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -1,8 +1,8 @@ # Multi-stage Dockerfile ARG BASE_IMAGE=nvcr.io/nvidia/pytorch ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver -ARG BASE_TAG=25.05-py3 -ARG TRITON_BASE_TAG=25.05-py3 +ARG BASE_TAG=25.06-py3 +ARG TRITON_BASE_TAG=25.06-py3 ARG DEVEL_IMAGE=devel FROM ${BASE_IMAGE}:${BASE_TAG} AS base diff --git a/docker/Makefile b/docker/Makefile index dde0e461c6f..8382d960884 100644 --- a/docker/Makefile +++ b/docker/Makefile @@ -186,16 +186,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE) jenkins-rockylinux8_%: STAGE = tritondevel jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda -jenkins-rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8 +jenkins-rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8 rockylinux8_%: STAGE = tritondevel rockylinux8_%: BASE_IMAGE = nvidia/cuda -rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8 +rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8 # For x86_64 and aarch64 ubuntu22_%: STAGE = tritondevel ubuntu22_%: BASE_IMAGE = nvidia/cuda -ubuntu22_%: BASE_TAG = 12.9.0-devel-ubuntu22.04 +ubuntu22_%: BASE_TAG = 12.9.1-devel-ubuntu22.04 trtllm_%: STAGE = release trtllm_%: PUSH_TO_STAGING := 0 diff --git a/docker/common/install_cuda_toolkit.sh b/docker/common/install_cuda_toolkit.sh index 86794f18c1d..c2573158198 100644 --- a/docker/common/install_cuda_toolkit.sh +++ b/docker/common/install_cuda_toolkit.sh @@ -5,7 +5,7 @@ set -ex # This script is used for reinstalling CUDA on Rocky Linux 8 with the run file. # CUDA version is usually aligned with the latest NGC CUDA image tag. # Only use when public CUDA image is not ready. -CUDA_VER="12.9.0_575.51.03" +CUDA_VER="12.9.1_575.57.08" CUDA_VER_SHORT="${CUDA_VER%_*}" NVCC_VERSION_OUTPUT=$(nvcc --version) diff --git a/docker/common/install_pytorch.sh b/docker/common/install_pytorch.sh index 52424b377b3..6dcba33039c 100644 --- a/docker/common/install_pytorch.sh +++ b/docker/common/install_pytorch.sh @@ -4,7 +4,7 @@ set -ex # Use latest stable version from https://pypi.org/project/torch/#history # and closest to the version specified in -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 TORCH_VERSION="2.7.1" SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"') diff --git a/docker/common/install_tensorrt.sh b/docker/common/install_tensorrt.sh index bff9803691a..b8ad7ff642d 100644 --- a/docker/common/install_tensorrt.sh +++ b/docker/common/install_tensorrt.sh @@ -4,21 +4,20 @@ set -ex TRT_VER="10.11.0.33" # Align with the pre-installed cuDNN / cuBLAS / NCCL versions from -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05 -CUDA_VER="12.9" # 12.9.0 +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 +CUDA_VER="12.9" # 12.9.1 # Keep the installation for cuDNN if users want to install PyTorch with source codes. # PyTorch 2.x can compile with cuDNN v9. -CUDNN_VER="9.10.1.4-1" -# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue. -# Use NCCL version 2.27.5 which has the fixes. +CUDNN_VER="9.10.2.21-1" +# PyTorch 25.06 uses NCCL 2.27.3. NCCL 2.27.5 resolves a perf regression issue. +# Use NCCL version 2.27.5 instead. NCCL_VER="2.27.5-1+cuda12.9" -# Use cuBLAS version 12.9.0.13 instead. -CUBLAS_VER="12.9.0.13-1" +CUBLAS_VER="12.9.1.4-1" # Align with the pre-installed CUDA / NVCC / NVRTC versions from # https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html -NVRTC_VER="12.9.41-1" -CUDA_RUNTIME="12.9.37-1" -CUDA_DRIVER_VERSION="575.51.03-1.el8" +NVRTC_VER="12.9.86-1" +CUDA_RUNTIME="12.9.79-1" +CUDA_DRIVER_VERSION="575.57.08-1.el8" for i in "$@"; do case $i in diff --git a/docs/source/reference/support-matrix.md b/docs/source/reference/support-matrix.md index 0c59baf992b..9a886b26fc9 100644 --- a/docs/source/reference/support-matrix.md +++ b/docs/source/reference/support-matrix.md @@ -145,7 +145,7 @@ The following table shows the supported software for TensorRT-LLM. * - - Software Compatibility * - Container - - [25.05](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html) + - [25.06](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html) * - TensorRT - [10.11](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html) * - Precision diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy index ea0ff373c6c..dc6e606b7c6 100644 --- a/jenkins/L0_Test.groovy +++ b/jenkins/L0_Test.groovy @@ -39,7 +39,7 @@ LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310 LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312 // DLFW torch image -DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.05-py3" +DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.06-py3" //Ubuntu base image UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04" diff --git a/requirements.txt b/requirements.txt index 16c1e4b5f8c..0df00898dd5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -22,7 +22,7 @@ h5py==3.12.1 StrEnum sentencepiece>=0.1.99 tensorrt~=10.11.0 -# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05 uses 2.8.0a0. +# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 uses 2.8.0a0. torch>=2.7.1,<=2.8.0a0 torchvision nvidia-modelopt[torch]~=0.33.0 From e3107356b98ec10b9a4ca25571524388c9324e66 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Thu, 3 Jul 2025 06:13:58 +0000 Subject: [PATCH 02/23] Update images Signed-off-by: Yiqing Yan --- jenkins/controlCCache.groovy | 2 +- jenkins/current_image_tags.properties | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index 0384904d67e..819bbd90afe 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507071100-5534" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507031358-5678" def createKubernetesPodConfig(image, arch = "amd64") { diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index 6e4863a11ed..f5c31fc16c9 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -11,7 +11,7 @@ # # NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507031358-5678 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507031358-5678 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507031358-5678 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507031358-5678 From 3d0f31ba187387609824bdb7be98d442cda0fe59 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Tue, 8 Jul 2025 08:32:20 +0000 Subject: [PATCH 03/23] Update images Signed-off-by: Yiqing Yan --- jenkins/controlCCache.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index 819bbd90afe..edd67ae6cd0 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507031358-5678" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507081620-5678" def createKubernetesPodConfig(image, arch = "amd64") { From cdefe82ece07a3a80ca044cabc9f24f49a4877a9 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Wed, 9 Jul 2025 07:55:54 +0000 Subject: [PATCH 04/23] remove WARs Signed-off-by: Yiqing Yan --- docker/Dockerfile.multi | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi index b5622677b50..c832481da9f 100644 --- a/docker/Dockerfile.multi +++ b/docker/Dockerfile.multi @@ -74,18 +74,10 @@ ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999" RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/ RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir -# WARs against security issues inherited from pytorch:25.04 -# * https://github.com/advisories/GHSA-vqfr-h8mv-ghfj -# * https://github.com/advisories/GHSA-7cx3-6m66-7c5m -# * https://github.com/advisories/GHSA-5rjg-fvgr-3xxf +# WARs against security issues inherited from pytorch:25.06 # * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7 -# * https://github.com/advisories/GHSA-33p9-3p43-82vq RUN pip3 install --upgrade --no-cache-dir \ - "h11>=0.16" \ - "tornado>=6.5.0" \ - "setuptools>=78.1.1,<80" \ - "protobuf>=4.25.8" \ - "jupyter-core>=5.8.1" + "protobuf>=4.25.8" FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton From 1c8d8775d70790754d57bd047ad98584e6768e42 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Wed, 9 Jul 2025 09:01:01 +0000 Subject: [PATCH 05/23] Update images Signed-off-by: Yiqing Yan --- jenkins/controlCCache.groovy | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index edd67ae6cd0..cbc6d88db90 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507081620-5678" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507091644-5678" def createKubernetesPodConfig(image, arch = "amd64") { From c151e96d520f032d155ab35229e03beb3bf37857 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Wed, 9 Jul 2025 12:39:48 +0000 Subject: [PATCH 06/23] reinstall libibverbs-dev to fix libmlx5 issue Signed-off-by: Yiqing Yan --- docker/common/install_base.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/common/install_base.sh b/docker/common/install_base.sh index d1c2f036d63..d9a074fb62a 100644 --- a/docker/common/install_base.sh +++ b/docker/common/install_base.sh @@ -44,6 +44,8 @@ cleanup() { init_ubuntu() { apt-get update + # libibverbs-dev is installed but libmlx5.so is missing, reinstall the package + apt-get --reinstall install libibverbs-dev apt-get install -y --no-install-recommends \ ccache \ gdb \ @@ -53,7 +55,6 @@ init_ubuntu() { llvm \ libclang-rt-dev \ libffi-dev \ - libibverbs-dev \ libnuma1 \ libnuma-dev \ python3-dev \ From 3048ba64bc5f6ab686dc215f2b5f5a467be8c192 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Wed, 9 Jul 2025 13:39:07 +0000 Subject: [PATCH 07/23] Update images Signed-off-by: Yiqing Yan --- jenkins/controlCCache.groovy | 2 +- jenkins/current_image_tags.properties | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index cbc6d88db90..51abc017cfa 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507091644-5678" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507092129-5678" def createKubernetesPodConfig(image, arch = "amd64") { diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index f5c31fc16c9..51471b87ef4 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -11,7 +11,7 @@ # # NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507031358-5678 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507031358-5678 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507031358-5678 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507031358-5678 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507092129-5678 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507092129-5678 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507092129-5678 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507092129-5678 From d26ce6ade817c1093ca32ba7975198258d81dd36 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Tue, 15 Jul 2025 08:49:26 +0000 Subject: [PATCH 08/23] Update images Signed-off-by: Yiqing Yan --- jenkins/controlCCache.groovy | 2 +- jenkins/current_image_tags.properties | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index 51abc017cfa..f9f861ae6b2 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507092129-5678" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507151632-5678" def createKubernetesPodConfig(image, arch = "amd64") { diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index 51471b87ef4..477670229ca 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -11,7 +11,7 @@ # # NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507092129-5678 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507092129-5678 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507092129-5678 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507092129-5678 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507151632-5678 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507151632-5678 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507151632-5678 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507151632-5678 From 68544d9489d7e2b69a82b5792207da1815386561 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Wed, 16 Jul 2025 09:08:59 +0000 Subject: [PATCH 09/23] Update images Signed-off-by: Yiqing Yan --- jenkins/controlCCache.groovy | 2 +- jenkins/current_image_tags.properties | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index f9f861ae6b2..e8d36f23bf0 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507151632-5678" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507161655-5678" def createKubernetesPodConfig(image, arch = "amd64") { diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index 477670229ca..48acc44c674 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -11,7 +11,7 @@ # # NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507151632-5678 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507151632-5678 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507151632-5678 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507151632-5678 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507161655-5678 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507161655-5678 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507161655-5678 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507161655-5678 From e3cdea5294b685fdea906197742da5c33d08e980 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Mon, 21 Jul 2025 02:41:44 +0000 Subject: [PATCH 10/23] Update images Signed-off-by: Yiqing Yan --- jenkins/controlCCache.groovy | 2 +- jenkins/current_image_tags.properties | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index e8d36f23bf0..518e9b5d8c9 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507161655-5678" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507211028-5678" def createKubernetesPodConfig(image, arch = "amd64") { diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index 48acc44c674..ede119a447b 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -11,7 +11,7 @@ # # NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507161655-5678 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507161655-5678 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507161655-5678 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507161655-5678 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507211028-5678 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507211028-5678 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507211028-5678 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507211028-5678 From a9482c0344ac2bebb40918b47be7d4b6a21f0ad1 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Mon, 21 Jul 2025 03:11:11 +0000 Subject: [PATCH 11/23] [nvbugs/5376229] - Fix Signed-off-by: Yiqing Yan --- tests/integration/defs/test_e2e.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/tests/integration/defs/test_e2e.py b/tests/integration/defs/test_e2e.py index 9d0ecc3d399..a69429c767d 100644 --- a/tests/integration/defs/test_e2e.py +++ b/tests/integration/defs/test_e2e.py @@ -1957,8 +1957,6 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path, modality, use_cuda_graph): # NOTE: individual tests need to be enabled in # tests/integration/test_lists/qa/examples_test_list.txt - llm_venv.run_cmd( - ['-m', 'pip', 'install', 'flash-attn==2.7.3', '--no-build-isolation']) example_root = Path(os.path.join(llm_root, "examples", "llm-api")) test_data_root = Path( From 725929884bb64bc69b9b53000627eec15c018fc0 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Thu, 24 Jul 2025 22:50:30 +0800 Subject: [PATCH 12/23] downgrade cuBLAS Signed-off-by: Yiqing Yan --- docker/common/install_tensorrt.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/docker/common/install_tensorrt.sh b/docker/common/install_tensorrt.sh index b8ad7ff642d..e1bb70dff46 100644 --- a/docker/common/install_tensorrt.sh +++ b/docker/common/install_tensorrt.sh @@ -12,7 +12,8 @@ CUDNN_VER="9.10.2.21-1" # PyTorch 25.06 uses NCCL 2.27.3. NCCL 2.27.5 resolves a perf regression issue. # Use NCCL version 2.27.5 instead. NCCL_VER="2.27.5-1+cuda12.9" -CUBLAS_VER="12.9.1.4-1" +# PyTorch 25.06 uses cuBLAS 12.9.1.4 but there are some issue in it. So don't upgrade cuBLAS version. +CUBLAS_VER="12.9.0.13-1" # Align with the pre-installed CUDA / NVCC / NVRTC versions from # https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html NVRTC_VER="12.9.86-1" From 632f4311e77551d77af6c5557ae45bb0507fbcb6 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Fri, 25 Jul 2025 02:26:44 +0000 Subject: [PATCH 13/23] Update image for cuBLAS downgrade Signed-off-by: Yiqing Yan --- jenkins/controlCCache.groovy | 2 +- jenkins/current_image_tags.properties | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/jenkins/controlCCache.groovy b/jenkins/controlCCache.groovy index 518e9b5d8c9..82fa7757ad0 100644 --- a/jenkins/controlCCache.groovy +++ b/jenkins/controlCCache.groovy @@ -1,7 +1,7 @@ import java.lang.InterruptedException -DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507211028-5678" +DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678" def createKubernetesPodConfig(image, arch = "amd64") { diff --git a/jenkins/current_image_tags.properties b/jenkins/current_image_tags.properties index ede119a447b..24078a941fb 100644 --- a/jenkins/current_image_tags.properties +++ b/jenkins/current_image_tags.properties @@ -11,7 +11,7 @@ # # NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that # images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead. -LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507211028-5678 -LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507211028-5678 -LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507211028-5678 -LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507211028-5678 +LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678 +LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678 +LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507251001-5678 +LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507251001-5678 From 1ed330c414a854be86c9a2a09a6780f47550b1c7 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Mon, 28 Jul 2025 09:18:00 +0000 Subject: [PATCH 14/23] fix for review Signed-off-by: Yiqing Yan --- docker/common/install_base.sh | 2 +- docker/common/install_tensorrt.sh | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/docker/common/install_base.sh b/docker/common/install_base.sh index d9a074fb62a..bf6e11420a6 100644 --- a/docker/common/install_base.sh +++ b/docker/common/install_base.sh @@ -45,7 +45,7 @@ cleanup() { init_ubuntu() { apt-get update # libibverbs-dev is installed but libmlx5.so is missing, reinstall the package - apt-get --reinstall install libibverbs-dev + apt-get --reinstall install -y libibverbs-dev apt-get install -y --no-install-recommends \ ccache \ gdb \ diff --git a/docker/common/install_tensorrt.sh b/docker/common/install_tensorrt.sh index e1bb70dff46..6d118b62c45 100644 --- a/docker/common/install_tensorrt.sh +++ b/docker/common/install_tensorrt.sh @@ -9,10 +9,11 @@ CUDA_VER="12.9" # 12.9.1 # Keep the installation for cuDNN if users want to install PyTorch with source codes. # PyTorch 2.x can compile with cuDNN v9. CUDNN_VER="9.10.2.21-1" -# PyTorch 25.06 uses NCCL 2.27.3. NCCL 2.27.5 resolves a perf regression issue. +# NGC PyTorch 25.06 image uses NCCL 2.27.3, while NCCL 2.27.5 resolves a perf regression issue. # Use NCCL version 2.27.5 instead. NCCL_VER="2.27.5-1+cuda12.9" -# PyTorch 25.06 uses cuBLAS 12.9.1.4 but there are some issue in it. So don't upgrade cuBLAS version. +# NGC PyTorch 25.06 image uses cuBLAS 12.9.1.4, but which leads to failures with MoE Lora (see https://nvbugs/5376270). +# Continue using cuBLAS 12.9.0.13 until this issue is resolved. CUBLAS_VER="12.9.0.13-1" # Align with the pre-installed CUDA / NVCC / NVRTC versions from # https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html From 5ae470ab26b62c527974b25d9e22a9e4dbf68a02 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Tue, 29 Jul 2025 07:57:56 +0000 Subject: [PATCH 15/23] fix for triton tag Signed-off-by: Yiqing Yan --- jenkins/Build.groovy | 4 ++- jenkins/scripts/get_triton_tag.sh | 36 +++++++++++++++++++ .../defs/triton_server/test_triton.py | 19 +++++++++- .../inflight_batcher_llm/scripts/build.sh | 5 ++- 4 files changed, 61 insertions(+), 3 deletions(-) create mode 100644 jenkins/scripts/get_triton_tag.sh diff --git a/jenkins/Build.groovy b/jenkins/Build.groovy index 5dae931b6ac..d689de393bc 100644 --- a/jenkins/Build.groovy +++ b/jenkins/Build.groovy @@ -446,7 +446,9 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64) // Build tritonserver artifacts def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim() // TODO: Remove after the cmake version is upgraded to 3.31.8 - sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install" + // Get triton tag from docker/dockerfile.multi + def tritonShortTag = sh(script: "${LLM_ROOT}/jenkins/scripts/get_triton_tag.sh ${LLM_ROOT}", returnStdout: true).trim() + sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install" // Step 3: packaging wheels into tarfile sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/" diff --git a/jenkins/scripts/get_triton_tag.sh b/jenkins/scripts/get_triton_tag.sh new file mode 100644 index 00000000000..b4b46ee93b5 --- /dev/null +++ b/jenkins/scripts/get_triton_tag.sh @@ -0,0 +1,36 @@ +#!/bin/bash + +# Script to get triton short tag from docker/Dockerfile.multi +# Usage: ./get_triton_tag.sh [llm_root_path] +# Output: triton short tag to stdout + +set -e + +# Default to current directory if no path provided +LLM_ROOT="${1:-.}" + +# Default triton tag +TRITON_SHORT_TAG="main" + +# Path to Dockerfile.multi +DOCKERFILE_MULTI_PATH="${LLM_ROOT}/docker/Dockerfile.multi" + +# Check if Dockerfile.multi exists +if [[ -f "$DOCKERFILE_MULTI_PATH" ]]; then + # Extract TRITON_BASE_TAG from Dockerfile.multi + TRITON_BASE_TAG_LINE=$(grep -E '^ARG TRITON_BASE_TAG=' "$DOCKERFILE_MULTI_PATH" | tail -n1) + + if [[ -n "$TRITON_BASE_TAG_LINE" ]]; then + TRITON_BASE_TAG=$(echo "$TRITON_BASE_TAG_LINE" | cut -d'=' -f2) + + if [[ -n "$TRITON_BASE_TAG" ]]; then + # Remove -py suffix and add r prefix + TRITON_SHORT_TAG="r${TRITON_BASE_TAG%-py}" + fi + fi +else + echo "Dockerfile.multi not found at $DOCKERFILE_MULTI_PATH" >&2 +fi + +# Output the triton short tag to stdout +echo "Using triton tag from Dockerfile.multi: $TRITON_SHORT_TAG" diff --git a/tests/integration/defs/triton_server/test_triton.py b/tests/integration/defs/triton_server/test_triton.py index 44b95dddf5f..dfc36eca6e9 100644 --- a/tests/integration/defs/triton_server/test_triton.py +++ b/tests/integration/defs/triton_server/test_triton.py @@ -506,9 +506,26 @@ def test_cpp_unit_tests(tritonserver_test_root, test_name, llm_root): "rm -rf build && " "mkdir -p build", llm_root) + # Get the value of TRITON_SHORT_TAG from docker/Dockerfile.multi + import subprocess + try: + triton_short_tag = subprocess.check_output( + [f"{llm_root}/jenkins/scripts/get_triton_tag.sh", llm_root], + text=True).strip() + print( + f"using triton tag from docker/Dockerfile.multi: {triton_short_tag}" + ) + except (subprocess.CalledProcessError, FileNotFoundError): + triton_short_tag = "main" + print("using default triton tag: main") run_shell_command( f"cd {llm_root}/triton_backend/inflight_batcher_llm/build && " - f"cmake .. -DTRTLLM_DIR={llm_root} -DCMAKE_INSTALL_PREFIX=install/ -DBUILD_TESTS=ON -DUSE_CXX11_ABI=ON -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 " + f"cmake .. -DTRTLLM_DIR={llm_root} -DCMAKE_INSTALL_PREFIX=install/ " + f"-DBUILD_TESTS=ON -DUSE_CXX11_ABI=ON " + f"-DTRITON_COMMON_REPO_TAG={triton_short_tag} " + f"-DTRITON_CORE_REPO_TAG={triton_short_tag} " + f"-DTRITON_THIRD_PARTY_REPO_TAG={triton_short_tag} " + f"-DTRITON_BACKEND_REPO_TAG={triton_short_tag} " "&& make -j8 install", llm_root) # Run the cpp unit tests diff --git a/triton_backend/inflight_batcher_llm/scripts/build.sh b/triton_backend/inflight_batcher_llm/scripts/build.sh index d077746bb51..44a5550021f 100644 --- a/triton_backend/inflight_batcher_llm/scripts/build.sh +++ b/triton_backend/inflight_batcher_llm/scripts/build.sh @@ -52,7 +52,10 @@ if [[ "$BUILD_UNIT_TESTS" == "true" ]]; then fi # TODO: Remove specifying Triton version after cmake version is upgraded to 3.31.8 -cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ${BUILD_TESTS_ARG} -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 .. +# Get TRITON_SHORT_TAG from docker/Dockerfile.multi +LLM_ROOT="$(dirname $0)/../../../.." +TRITON_SHORT_TAG=$("$LLM_ROOT/jenkins/scripts/get_triton_tag.sh" "$LLM_ROOT") +cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ${BUILD_TESTS_ARG} -DTRITON_COMMON_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_CORE_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_BACKEND_REPO_TAG=${TRITON_SHORT_TAG} .. make install mkdir -p /opt/tritonserver/backends/tensorrtllm From 2bd936b1af82d79be2163820141a8f54fad5ef95 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Tue, 29 Jul 2025 09:39:08 +0000 Subject: [PATCH 16/23] fix Signed-off-by: Yiqing Yan --- jenkins/scripts/get_triton_tag.sh | 0 1 file changed, 0 insertions(+), 0 deletions(-) mode change 100644 => 100755 jenkins/scripts/get_triton_tag.sh diff --git a/jenkins/scripts/get_triton_tag.sh b/jenkins/scripts/get_triton_tag.sh old mode 100644 new mode 100755 From bc5a2a179de6abfcbb88e32fe6cfa32dbc02486b Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Tue, 29 Jul 2025 10:10:59 +0000 Subject: [PATCH 17/23] fix Signed-off-by: Yiqing Yan --- jenkins/scripts/get_triton_tag.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/jenkins/scripts/get_triton_tag.sh b/jenkins/scripts/get_triton_tag.sh index b4b46ee93b5..7f353e7cf39 100755 --- a/jenkins/scripts/get_triton_tag.sh +++ b/jenkins/scripts/get_triton_tag.sh @@ -24,8 +24,8 @@ if [[ -f "$DOCKERFILE_MULTI_PATH" ]]; then TRITON_BASE_TAG=$(echo "$TRITON_BASE_TAG_LINE" | cut -d'=' -f2) if [[ -n "$TRITON_BASE_TAG" ]]; then - # Remove -py suffix and add r prefix - TRITON_SHORT_TAG="r${TRITON_BASE_TAG%-py}" + # Remove -py3 suffix and add r prefix + TRITON_SHORT_TAG="r${TRITON_BASE_TAG%-py3}" fi fi else From b05ed900cfc8680092942ce66f48f6f418ba6a6f Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Tue, 29 Jul 2025 10:44:32 +0000 Subject: [PATCH 18/23] fix Signed-off-by: Yiqing Yan --- jenkins/scripts/get_triton_tag.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/jenkins/scripts/get_triton_tag.sh b/jenkins/scripts/get_triton_tag.sh index 7f353e7cf39..6a592609009 100755 --- a/jenkins/scripts/get_triton_tag.sh +++ b/jenkins/scripts/get_triton_tag.sh @@ -26,6 +26,7 @@ if [[ -f "$DOCKERFILE_MULTI_PATH" ]]; then if [[ -n "$TRITON_BASE_TAG" ]]; then # Remove -py3 suffix and add r prefix TRITON_SHORT_TAG="r${TRITON_BASE_TAG%-py3}" + echo "Using triton tag from Dockerfile.multi: $TRITON_SHORT_TAG" >&2 fi fi else @@ -33,4 +34,4 @@ else fi # Output the triton short tag to stdout -echo "Using triton tag from Dockerfile.multi: $TRITON_SHORT_TAG" +echo "$TRITON_SHORT_TAG" From f451678cc004aa8e2523ad743ea8a1ccbc957dd4 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Wed, 30 Jul 2025 06:36:53 +0000 Subject: [PATCH 19/23] change for build SBSA Signed-off-by: Yiqing Yan --- jenkins/Build.groovy | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jenkins/Build.groovy b/jenkins/Build.groovy index d689de393bc..4b665f8bb9c 100644 --- a/jenkins/Build.groovy +++ b/jenkins/Build.groovy @@ -109,9 +109,9 @@ def globalVars = [ // TODO: Move common variables to an unified location BUILD_CORES_REQUEST = "8" BUILD_CORES_LIMIT = "8" -BUILD_MEMORY_REQUEST = "48Gi" -BUILD_MEMORY_LIMIT = "64Gi" -BUILD_JOBS = "8" +BUILD_MEMORY_REQUEST = "96Gi" +BUILD_MEMORY_LIMIT = "96Gi" +BUILD_JOBS = "4" TESTER_CORES = "12" TESTER_MEMORY = "96Gi" From e4573c3aa7a8f4a47bf2c07469f189c764005d10 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Thu, 31 Jul 2025 09:39:35 +0000 Subject: [PATCH 20/23] fix triton version Signed-off-by: Yiqing Yan --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index 0df00898dd5..d8a0b268178 100644 --- a/requirements.txt +++ b/requirements.txt @@ -61,3 +61,4 @@ etcd3 blake3 llguidance==0.7.29 soundfile +triton==3.3.1 From e5035b406f11965117c31b84a0d27d1ec89469b5 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Fri, 1 Aug 2025 06:42:09 +0000 Subject: [PATCH 21/23] revert build memory and jobs change Signed-off-by: Yiqing Yan --- jenkins/Build.groovy | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/jenkins/Build.groovy b/jenkins/Build.groovy index 4b665f8bb9c..d689de393bc 100644 --- a/jenkins/Build.groovy +++ b/jenkins/Build.groovy @@ -109,9 +109,9 @@ def globalVars = [ // TODO: Move common variables to an unified location BUILD_CORES_REQUEST = "8" BUILD_CORES_LIMIT = "8" -BUILD_MEMORY_REQUEST = "96Gi" -BUILD_MEMORY_LIMIT = "96Gi" -BUILD_JOBS = "4" +BUILD_MEMORY_REQUEST = "48Gi" +BUILD_MEMORY_LIMIT = "64Gi" +BUILD_JOBS = "8" TESTER_CORES = "12" TESTER_MEMORY = "96Gi" From b8e96d63c214aa654377bd499dc3f6479afb3d38 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Sun, 3 Aug 2025 02:56:30 +0000 Subject: [PATCH 22/23] fix for review Signed-off-by: Yiqing Yan --- .../defs/triton_server/test_triton.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/tests/integration/defs/triton_server/test_triton.py b/tests/integration/defs/triton_server/test_triton.py index dfc36eca6e9..46df91af77f 100644 --- a/tests/integration/defs/triton_server/test_triton.py +++ b/tests/integration/defs/triton_server/test_triton.py @@ -508,16 +508,12 @@ def test_cpp_unit_tests(tritonserver_test_root, test_name, llm_root): # Get the value of TRITON_SHORT_TAG from docker/Dockerfile.multi import subprocess - try: - triton_short_tag = subprocess.check_output( - [f"{llm_root}/jenkins/scripts/get_triton_tag.sh", llm_root], - text=True).strip() - print( - f"using triton tag from docker/Dockerfile.multi: {triton_short_tag}" - ) - except (subprocess.CalledProcessError, FileNotFoundError): - triton_short_tag = "main" - print("using default triton tag: main") + triton_short_tag = subprocess.check_output( + [f"{llm_root}/jenkins/scripts/get_triton_tag.sh", llm_root], + text=True).strip() + print( + f"using triton tag from docker/Dockerfile.multi: {triton_short_tag}" + ) run_shell_command( f"cd {llm_root}/triton_backend/inflight_batcher_llm/build && " f"cmake .. -DTRTLLM_DIR={llm_root} -DCMAKE_INSTALL_PREFIX=install/ " From 746565906d7a408970553581c37d35c09b897312 Mon Sep 17 00:00:00 2001 From: Yiqing Yan Date: Sun, 3 Aug 2025 03:04:23 +0000 Subject: [PATCH 23/23] fix for pre-commit Signed-off-by: Yiqing Yan --- tests/integration/defs/triton_server/test_triton.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/tests/integration/defs/triton_server/test_triton.py b/tests/integration/defs/triton_server/test_triton.py index 46df91af77f..4afab3868c5 100644 --- a/tests/integration/defs/triton_server/test_triton.py +++ b/tests/integration/defs/triton_server/test_triton.py @@ -511,9 +511,7 @@ def test_cpp_unit_tests(tritonserver_test_root, test_name, llm_root): triton_short_tag = subprocess.check_output( [f"{llm_root}/jenkins/scripts/get_triton_tag.sh", llm_root], text=True).strip() - print( - f"using triton tag from docker/Dockerfile.multi: {triton_short_tag}" - ) + print(f"using triton tag from docker/Dockerfile.multi: {triton_short_tag}") run_shell_command( f"cd {llm_root}/triton_backend/inflight_batcher_llm/build && " f"cmake .. -DTRTLLM_DIR={llm_root} -DCMAKE_INSTALL_PREFIX=install/ "