Skip to content
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ TensorRT-LLM
[![Documentation](https://img.shields.io/badge/docs-latest-brightgreen.svg?style=flat)](https://nvidia.github.io/TensorRT-LLM/)
[![python](https://img.shields.io/badge/python-3.12-green)](https://www.python.org/downloads/release/python-3123/)
[![python](https://img.shields.io/badge/python-3.10-green)](https://www.python.org/downloads/release/python-31012/)
[![cuda](https://img.shields.io/badge/cuda-12.9.0-green)](https://developer.nvidia.com/cuda-downloads)
[![cuda](https://img.shields.io/badge/cuda-12.9.1-green)](https://developer.nvidia.com/cuda-downloads)
[![trt](https://img.shields.io/badge/TRT-10.11.0-green)](https://developer.nvidia.com/tensorrt)
[![version](https://img.shields.io/badge/release-1.0.0rc5-green)](./tensorrt_llm/version.py)
[![license](https://img.shields.io/badge/license-Apache%202-blue)](./LICENSE)
Expand Down
10 changes: 1 addition & 9 deletions constraints.txt
Original file line number Diff line number Diff line change
@@ -1,13 +1,5 @@
# These vulnerabilities were inherited from the base image (pytorch:25.05-py3) and should be removed when the base image
# These vulnerabilities were inherited from the base image (pytorch:25.06-py3) and should be removed when the base image
# is updated.

# WAR against https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
h11>=0.16.0
# WAR against https://github.com/advisories/GHSA-7cx3-6m66-7c5m
tornado>=6.5.0
# WAR against https://github.com/advisories/GHSA-5rjg-fvgr-3xxf
setuptools>=78.1.1
# WAR against https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
protobuf>=4.25.8
# WAR against https://github.com/advisories/GHSA-33p9-3p43-82vq
jupyter-core>=5.8.1
16 changes: 4 additions & 12 deletions docker/Dockerfile.multi
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
# Multi-stage Dockerfile
ARG BASE_IMAGE=nvcr.io/nvidia/pytorch
ARG TRITON_IMAGE=nvcr.io/nvidia/tritonserver
ARG BASE_TAG=25.05-py3
ARG TRITON_BASE_TAG=25.05-py3
ARG BASE_TAG=25.06-py3
ARG TRITON_BASE_TAG=25.06-py3
ARG DEVEL_IMAGE=devel

FROM ${BASE_IMAGE}:${BASE_TAG} AS base
Expand Down Expand Up @@ -74,18 +74,10 @@ ENV PYTORCH_CUDA_ALLOC_CONF="garbage_collection_threshold:0.99999"
RUN pip3 uninstall -y opencv && rm -rf /usr/local/lib/python3*/dist-packages/cv2/
RUN pip3 install opencv-python-headless --force-reinstall --no-deps --no-cache-dir

# WARs against security issues inherited from pytorch:25.04
# * https://github.com/advisories/GHSA-vqfr-h8mv-ghfj
# * https://github.com/advisories/GHSA-7cx3-6m66-7c5m
# * https://github.com/advisories/GHSA-5rjg-fvgr-3xxf
# WARs against security issues inherited from pytorch:25.06
# * https://github.com/advisories/GHSA-8qvm-5x2c-j2w7
# * https://github.com/advisories/GHSA-33p9-3p43-82vq
RUN pip3 install --upgrade --no-cache-dir \
"h11>=0.16" \
"tornado>=6.5.0" \
"setuptools>=78.1.1,<80" \
"protobuf>=4.25.8" \
"jupyter-core>=5.8.1"
"protobuf>=4.25.8"

FROM ${TRITON_IMAGE}:${TRITON_BASE_TAG} AS triton

Expand Down
6 changes: 3 additions & 3 deletions docker/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -186,16 +186,16 @@ jenkins-rockylinux8_%: PYTHON_VERSION_TAG_ID = $(if $(findstring 3.12,${PYTHON_V
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_${PYTHON_VERSION_TAG_ID}_DOCKER_IMAGE)
jenkins-rockylinux8_%: STAGE = tritondevel
jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
jenkins-rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
jenkins-rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8

rockylinux8_%: STAGE = tritondevel
rockylinux8_%: BASE_IMAGE = nvidia/cuda
rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
rockylinux8_%: BASE_TAG = 12.9.1-devel-rockylinux8

# For x86_64 and aarch64
ubuntu22_%: STAGE = tritondevel
ubuntu22_%: BASE_IMAGE = nvidia/cuda
ubuntu22_%: BASE_TAG = 12.9.0-devel-ubuntu22.04
ubuntu22_%: BASE_TAG = 12.9.1-devel-ubuntu22.04

trtllm_%: STAGE = release
trtllm_%: PUSH_TO_STAGING := 0
Expand Down
3 changes: 2 additions & 1 deletion docker/common/install_base.sh
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,8 @@ cleanup() {

init_ubuntu() {
apt-get update
# libibverbs-dev is installed but libmlx5.so is missing, reinstall the package
apt-get --reinstall install -y libibverbs-dev
apt-get install -y --no-install-recommends \
ccache \
gdb \
Expand All @@ -53,7 +55,6 @@ init_ubuntu() {
llvm \
libclang-rt-dev \
libffi-dev \
libibverbs-dev \
libnuma1 \
libnuma-dev \
python3-dev \
Expand Down
2 changes: 1 addition & 1 deletion docker/common/install_cuda_toolkit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ set -ex
# This script is used for reinstalling CUDA on Rocky Linux 8 with the run file.
# CUDA version is usually aligned with the latest NGC CUDA image tag.
# Only use when public CUDA image is not ready.
CUDA_VER="12.9.0_575.51.03"
CUDA_VER="12.9.1_575.57.08"
CUDA_VER_SHORT="${CUDA_VER%_*}"

NVCC_VERSION_OUTPUT=$(nvcc --version)
Expand Down
2 changes: 1 addition & 1 deletion docker/common/install_pytorch.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ set -ex

# Use latest stable version from https://pypi.org/project/torch/#history
# and closest to the version specified in
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
TORCH_VERSION="2.7.1"
SYSTEM_ID=$(grep -oP '(?<=^ID=).+' /etc/os-release | tr -d '"')

Expand Down
19 changes: 10 additions & 9 deletions docker/common/install_tensorrt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,21 +4,22 @@ set -ex

TRT_VER="10.11.0.33"
# Align with the pre-installed cuDNN / cuBLAS / NCCL versions from
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05
CUDA_VER="12.9" # 12.9.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06
CUDA_VER="12.9" # 12.9.1
# Keep the installation for cuDNN if users want to install PyTorch with source codes.
# PyTorch 2.x can compile with cuDNN v9.
CUDNN_VER="9.10.1.4-1"
# NCCL version 2.26.x used in the NGC PyTorch 25.05 image but has a performance regression issue.
# Use NCCL version 2.27.5 which has the fixes.
CUDNN_VER="9.10.2.21-1"
# NGC PyTorch 25.06 image uses NCCL 2.27.3, while NCCL 2.27.5 resolves a perf regression issue.
# Use NCCL version 2.27.5 instead.
NCCL_VER="2.27.5-1+cuda12.9"
# Use cuBLAS version 12.9.0.13 instead.
# NGC PyTorch 25.06 image uses cuBLAS 12.9.1.4, but which leads to failures with MoE Lora (see https://nvbugs/5376270).
# Continue using cuBLAS 12.9.0.13 until this issue is resolved.
CUBLAS_VER="12.9.0.13-1"
# Align with the pre-installed CUDA / NVCC / NVRTC versions from
# https://docs.nvidia.com/cuda/cuda-toolkit-release-notes/index.html
NVRTC_VER="12.9.41-1"
CUDA_RUNTIME="12.9.37-1"
CUDA_DRIVER_VERSION="575.51.03-1.el8"
NVRTC_VER="12.9.86-1"
CUDA_RUNTIME="12.9.79-1"
CUDA_DRIVER_VERSION="575.57.08-1.el8"

for i in "$@"; do
case $i in
Expand Down
2 changes: 1 addition & 1 deletion docs/source/reference/support-matrix.md
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ The following table shows the supported software for TensorRT-LLM.
* -
- Software Compatibility
* - Container
- [25.05](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
- [25.06](https://docs.nvidia.com/deeplearning/frameworks/support-matrix/index.html)
* - TensorRT
- [10.11](https://docs.nvidia.com/deeplearning/tensorrt/release-notes/index.html)
* - Precision
Expand Down
4 changes: 3 additions & 1 deletion jenkins/Build.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -446,7 +446,9 @@ def runLLMBuild(pipeline, buildFlags, tarName, is_linux_x86_64)
// Build tritonserver artifacts
def llmPath = sh (script: "realpath ${LLM_ROOT}",returnStdout: true).trim()
// TODO: Remove after the cmake version is upgraded to 3.31.8
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"
// Get triton tag from docker/dockerfile.multi
def tritonShortTag = sh(script: "${LLM_ROOT}/jenkins/scripts/get_triton_tag.sh ${LLM_ROOT}", returnStdout: true).trim()
sh "cd ${LLM_ROOT}/triton_backend/inflight_batcher_llm && mkdir build && cd build && cmake .. -DTRTLLM_DIR=${llmPath} -DTRITON_COMMON_REPO_TAG=${tritonShortTag} -DTRITON_CORE_REPO_TAG=${tritonShortTag} -DTRITON_THIRD_PARTY_REPO_TAG=${tritonShortTag} -DTRITON_BACKEND_REPO_TAG=${tritonShortTag} -DUSE_CXX11_ABI=ON && make -j${BUILD_JOBS} install"

// Step 3: packaging wheels into tarfile
sh "cp ${LLM_ROOT}/build/tensorrt_llm-*.whl TensorRT-LLM/"
Expand Down
2 changes: 1 addition & 1 deletion jenkins/L0_Test.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE = env.wheelDockerImagePy310
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = env.wheelDockerImagePy312

// DLFW torch image
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.05-py3"
DLFW_IMAGE = "nvcr.io/nvidia/pytorch:25.06-py3"

//Ubuntu base image
UBUNTU_22_04_IMAGE = "urm.nvidia.com/docker/ubuntu:22.04"
Expand Down
2 changes: 1 addition & 1 deletion jenkins/controlCCache.groovy
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@

import java.lang.InterruptedException

DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507071100-5534"
DOCKER_IMAGE = "urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678"

def createKubernetesPodConfig(image, arch = "amd64")
{
Expand Down
8 changes: 4 additions & 4 deletions jenkins/current_image_tags.properties
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
#
# NB: Typically, the suffix indicates the PR whose CI pipeline generated the images. In case that
# images are adopted from PostMerge pipelines, the abbreviated commit hash is used instead.
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.0-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507162011-ec3ebae
LLM_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678
LLM_SBSA_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.06-py3-aarch64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202507251001-5678
LLM_ROCKYLINUX8_PY310_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py310-trt10.11.0.33-skip-tritondevel-202507251001-5678
LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE=urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:cuda-12.9.1-devel-rocky8-x86_64-rocky8-py312-trt10.11.0.33-skip-tritondevel-202507251001-5678
37 changes: 37 additions & 0 deletions jenkins/scripts/get_triton_tag.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
#!/bin/bash

# Script to get triton short tag from docker/Dockerfile.multi
# Usage: ./get_triton_tag.sh [llm_root_path]
# Output: triton short tag to stdout

set -e

# Default to current directory if no path provided
LLM_ROOT="${1:-.}"

# Default triton tag
TRITON_SHORT_TAG="main"

# Path to Dockerfile.multi
DOCKERFILE_MULTI_PATH="${LLM_ROOT}/docker/Dockerfile.multi"

# Check if Dockerfile.multi exists
if [[ -f "$DOCKERFILE_MULTI_PATH" ]]; then
# Extract TRITON_BASE_TAG from Dockerfile.multi
TRITON_BASE_TAG_LINE=$(grep -E '^ARG TRITON_BASE_TAG=' "$DOCKERFILE_MULTI_PATH" | tail -n1)

if [[ -n "$TRITON_BASE_TAG_LINE" ]]; then
TRITON_BASE_TAG=$(echo "$TRITON_BASE_TAG_LINE" | cut -d'=' -f2)

if [[ -n "$TRITON_BASE_TAG" ]]; then
# Remove -py3 suffix and add r prefix
TRITON_SHORT_TAG="r${TRITON_BASE_TAG%-py3}"
echo "Using triton tag from Dockerfile.multi: $TRITON_SHORT_TAG" >&2
fi
fi
else
echo "Dockerfile.multi not found at $DOCKERFILE_MULTI_PATH" >&2
fi

# Output the triton short tag to stdout
echo "$TRITON_SHORT_TAG"
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ h5py==3.12.1
StrEnum
sentencepiece>=0.1.99
tensorrt~=10.11.0
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-05.html#rel-25-05 uses 2.8.0a0.
# https://docs.nvidia.com/deeplearning/frameworks/pytorch-release-notes/rel-25-06.html#rel-25-06 uses 2.8.0a0.
torch>=2.7.1,<=2.8.0a0
torchvision
nvidia-modelopt[torch]~=0.33.0
Expand Down
2 changes: 0 additions & 2 deletions tests/integration/defs/test_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -1984,8 +1984,6 @@ def test_ptp_quickstart_multimodal(llm_root, llm_venv, model_name, model_path,
modality, use_cuda_graph):
# NOTE: individual tests need to be enabled in
# tests/integration/test_lists/qa/examples_test_list.txt
llm_venv.run_cmd(
['-m', 'pip', 'install', 'flash-attn==2.7.3', '--no-build-isolation'])

example_root = Path(os.path.join(llm_root, "examples", "llm-api"))
test_data_root = Path(
Expand Down
19 changes: 18 additions & 1 deletion tests/integration/defs/triton_server/test_triton.py
Original file line number Diff line number Diff line change
Expand Up @@ -506,9 +506,26 @@ def test_cpp_unit_tests(tritonserver_test_root, test_name, llm_root):
"rm -rf build && "
"mkdir -p build", llm_root)

# Get the value of TRITON_SHORT_TAG from docker/Dockerfile.multi
import subprocess
try:
triton_short_tag = subprocess.check_output(
[f"{llm_root}/jenkins/scripts/get_triton_tag.sh", llm_root],
text=True).strip()
print(
f"using triton tag from docker/Dockerfile.multi: {triton_short_tag}"
)
except (subprocess.CalledProcessError, FileNotFoundError):
triton_short_tag = "main"
print("using default triton tag: main")
run_shell_command(
f"cd {llm_root}/triton_backend/inflight_batcher_llm/build && "
f"cmake .. -DTRTLLM_DIR={llm_root} -DCMAKE_INSTALL_PREFIX=install/ -DBUILD_TESTS=ON -DUSE_CXX11_ABI=ON -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 "
f"cmake .. -DTRTLLM_DIR={llm_root} -DCMAKE_INSTALL_PREFIX=install/ "
f"-DBUILD_TESTS=ON -DUSE_CXX11_ABI=ON "
f"-DTRITON_COMMON_REPO_TAG={triton_short_tag} "
f"-DTRITON_CORE_REPO_TAG={triton_short_tag} "
f"-DTRITON_THIRD_PARTY_REPO_TAG={triton_short_tag} "
f"-DTRITON_BACKEND_REPO_TAG={triton_short_tag} "
"&& make -j8 install", llm_root)

# Run the cpp unit tests
Expand Down
5 changes: 4 additions & 1 deletion triton_backend/inflight_batcher_llm/scripts/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,10 @@ if [[ "$BUILD_UNIT_TESTS" == "true" ]]; then
fi

# TODO: Remove specifying Triton version after cmake version is upgraded to 3.31.8
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ${BUILD_TESTS_ARG} -DTRITON_COMMON_REPO_TAG=r25.05 -DTRITON_CORE_REPO_TAG=r25.05 -DTRITON_THIRD_PARTY_REPO_TAG=r25.05 -DTRITON_BACKEND_REPO_TAG=r25.05 ..
# Get TRITON_SHORT_TAG from docker/Dockerfile.multi
LLM_ROOT="$(dirname $0)/../../../.."
TRITON_SHORT_TAG=$("$LLM_ROOT/jenkins/scripts/get_triton_tag.sh" "$LLM_ROOT")
cmake -DCMAKE_INSTALL_PREFIX:PATH=`pwd`/install ${BUILD_TESTS_ARG} -DTRITON_COMMON_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_CORE_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_THIRD_PARTY_REPO_TAG=${TRITON_SHORT_TAG} -DTRITON_BACKEND_REPO_TAG=${TRITON_SHORT_TAG} ..
make install

mkdir -p /opt/tritonserver/backends/tensorrtllm
Expand Down
Loading