[Infra][TRTLLM-6224] - Upgrade dependencies to DLFW 25.06 and CUDA 12.9.1

yiqingy0 · yiqingy0 · commit 0c552007e083 · 2025-07-02T14:06:13.000Z
Signed-off-by: Yiqing Yan &lt;yiqingy@nvidia.com&gt;
diff --git a/README.md b/README.md
@@ -7,7 +7,7 @@ TensorRT-LLM
 [![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://nvidia.github.io/TensorRT-LLM/)
 [![python](https://img.shields.io/badge/python-3.12-green)](https://www.python.org/downloads/release/python-3123/)
 [![python](https://img.shields.io/badge/python-3.10-green)](https://www.python.org/downloads/release/python-31012/)
-[![cuda](https://img.shields.io/badge/cuda-12.9.0-green)](https://developer.nvidia.com/cuda-downloads)
+[![cuda](https://img.shields.io/badge/cuda-12.9.1-green)](https://developer.nvidia.com/cuda-downloads)
 [![trt](https://img.shields.io/badge/TRT-10.11.0-green)](https://developer.nvidia.com/tensorrt)
 [![version](https://img.shields.io/badge/release-1.0.0rc1-green)](./tensorrt_llm/version.py)
 [![license](https://img.shields.io/badge/license-Apache%202-blue)](./LICENSE)
diff --git a/constraints.txt b/constraints.txt
@@ -1,9 +0,0 @@
-# These vulnerabilities were inherited from the base image (pytorch:25.05-py3) and should be removed when the base image
-# is updated.
-
-# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
-h11>=0.16.0
-# WAR against https://github.com/advisories/GHSA-7cx3-6m66-7c5m
-tornado>=6.5.0
-# WAR against https://github.com/advisories/GHSA-5rjg-fvgr-3xxf
-setuptools>=78.1.1
diff --git a/docker/Dockerfile.multi b/docker/Dockerfile.multi
@@ -1,8 +1,8 @@
 # Multi-stage Dockerfile
 ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
 ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
-ARG BASE_TAG=25.05-py3
-ARG TRITON_BASE_TAG=25.05-py3
+ARG BASE_TAG=25.06-py3
+ARG TRITON_BASE_TAG=25.06-py3
 ARG DEVEL_IMAGE=devel
 
 FROM ${BASE_IMAGE}:${BASE_TAG} AS base
diff --git a/docker/Makefile b/docker/Makefile
@@ -182,16 +182,16 @@ jenkins-aarch64_%: STAGE = tritondevel
 jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE)
 jenkins-rockylinux8_%: STAGE = tritondevel
 jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
-jenkins-rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
+jenkins-rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8
 
 rockylinux8_%: STAGE = tritondevel
 rockylinux8_%: BASE_IMAGE = nvidia/cuda
-rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
+rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8
 
 # For x86_64 and aarch64
 ubuntu22_%: STAGE = tritondevel
 ubuntu22_%: BASE_IMAGE = nvidia/cuda
-ubuntu22_%: BASE_TAG = 12.9.0-devel-ubuntu22.04
+ubuntu22_%: BASE_TAG = 12.9.1-devel-ubuntu22.04
 
 trtllm_%: STAGE = release
 trtllm_%: PUSH_TO_STAGING := 0
diff --git a/docker/common/install_cuda_toolkit.sh b/docker/common/install_cuda_toolkit.sh
@@ -5,7 +5,7 @@ set -ex
 # This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
 # CUDA version is usually aligned with the latest NGC CUDA image tag.
 # Only use when public CUDA image is not ready.
-CUDA_VER="12.9.0_575.51.03"
+CUDA_VER="12.9.1_575.57.08"
 CUDA_VER_SHORT="${CUDA_VER%_*}"
 
 NVCC_VERSION_OUTPUT=$(nvcc --version)
diff --git a/docker/common/install_pytorch.sh b/docker/common/install_pytorch.sh
@@ -4,7 +4,7 @@ set -ex
 
 # Use latest stable version from https://pypi.org/project/torch/#history
 # and closest to the version specified in
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
 TORCH_VERSION="2.7.1"
 SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')
 
diff --git a/docker/common/install_tensorrt.sh b/docker/common/install_tensorrt.sh
@@ -4,21 +4,20 @@ set -ex
 
 TRT_VER="10.11.0.33"
 # Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05
-CUDA_VER="12.9" # 12.9.0
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
+CUDA_VER="12.9" # 12.9.1
 # Keep the installation for cuDNN if users want to install PyTorch with source codes.
 # PyTorch 2.x can compile with cuDNN v9.
-CUDNN_VER="9.10.1.4-1"
-# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
-# Use NCCL version 2.27.5 which has the fixes.
+CUDNN_VER="9.10.2.21-1"
+# PyTorch 25.06 uses NCCL 2.27.3. NCCL 2.27.5 resolves a perf regression issue.
+# Use NCCL version 2.27.5 instead.
 NCCL_VER="2.27.5-1+cuda12.9"
-# Use cuBLAS version 12.9.0.13 instead.
-CUBLAS_VER="12.9.0.13-1"
+CUBLAS_VER="12.9.1.4-1"
 # Align with the pre-installed CUDA / NVCC / NVRTC versions from
 # https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
-NVRTC_VER="12.9.41-1"
-CUDA_RUNTIME="12.9.37-1"
-CUDA_DRIVER_VERSION="575.51.03-1.el8"
+NVRTC_VER="12.9.86-1"
+CUDA_RUNTIME="12.9.79-1"
+CUDA_DRIVER_VERSION="575.57.08-1.el8"
 
 for i in "$@"; do
     case $i in
diff --git a/docs/source/reference/support-matrix.md b/docs/source/reference/support-matrix.md
@@ -142,7 +142,7 @@ The following table shows the supported software for TensorRT-LLM.
 * -
   - Software Compatibility
 * - Container
-  - [25.05](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
+  - [25.06](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
 * - TensorRT
   - [10.11](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html)
 * - Precision
diff --git a/jenkins/L0_Test.groovy b/jenkins/L0_Test.groovy
@@ -39,7 +39,7 @@ LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
 LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312
 
 // DLFW torch image
-DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.05-py3"
+DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.06-py3"
 
 //Ubuntu base image
 UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04"
diff --git a/requirements.txt b/requirements.txt
@@ -22,7 +22,7 @@ h5py==3.12.1
 StrEnum
 sentencepiece>=0.1.99
 tensorrt~=10.11.0
-# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05 uses 2.8.0a0.
+# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 uses 2.8.0a0.
 torch>=2.7.1,<=2.8.0a0
 torchvision
 nvidia-modelopt[torch]~=0.31.0