Skip to content

Commit 04fa6c0

Browse files
authored
[TRTLLM-6143] feat: Improve dev container tagging (#5551)
Signed-off-by: ixlmar <[email protected]>
1 parent 31699cb commit 04fa6c0

12 files changed

+409
-37
lines changed

.devcontainer/devcontainer.env

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
# Environment variables used to configure the Dev Container setup.
2+
#
3+
# The syntax needs to be compatible with
4+
# https://docs.docker.com/compose/how-tos/environment-variables/variable-interpolation/#env-file-syntax
5+
#
6+
# Edit this file as necessary. For local changes not to be committed back
7+
# to the repository, create/edit devcontainer.env.user instead.
8+
HF_HOME_DEFAULT="${HOME}/.cache/huggingface"
9+
HF_HOME_XDG_DEFAULT="${XDG_CACHE_HOME:-${HF_HOME_DEFAULT}}"
10+
LOCAL_HF_HOME="${HF_HOME:-${HF_HOME_XDG_DEFAULT}}"

.devcontainer/devcontainer.json

Lines changed: 6 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,24 +3,18 @@
33
{
44
"name": "TRT-LLM Devcontainer",
55
"dockerComposeFile": [
6-
"docker-compose.yml"
6+
"docker-compose.yml",
7+
"docker-compose.override.yml"
78
],
89
"service": "tensorrt_llm-dev",
910
"remoteUser": "ubuntu",
1011
"containerEnv": {
11-
// "CCACHE_DIR" : "/home/coder/${localWorkspaceFolderBasename}/cpp/.ccache",
12-
// "CCACHE_BASEDIR" : "/home/coder/${localWorkspaceFolderBasename}",
1312
"HF_TOKEN": "${localEnv:HF_TOKEN}",
1413
"HF_HOME": "/huggingface",
1514
"HISTFILE": "${containerWorkspaceFolder}/.cache/._bash_history"
1615
},
1716
"workspaceFolder": "/workspaces/tensorrt_llm",
18-
// "workspaceFolder": "/home/coder/${localWorkspaceFolderBasename}",
19-
// "workspaceMount": "source=${localWorkspaceFolder},target=/home/coder/${localWorkspaceFolderBasename},type=bind,consistency=consistent",
20-
"mounts": [
21-
"source=${localEnv:HOME}/.cache/huggingface,target=/huggingface,type=bind", // HF cache
22-
"source=/home/scratch.trt_llm_data/,target=/home/scratch.trt_llm_data/,type=bind,consistency=consistent"
23-
],
17+
"initializeCommand": "cd ${localWorkspaceFolder} && ./.devcontainer/make_env.py",
2418
// Note: sourcing .profile is required since we use a local user and the python interpreter is
2519
// global (/usr/bin/python). In this case, pip will default to a local user path which is not
2620
// by default in the PATH. In interactive devcontainer shells, .profile is sourced by default.
@@ -43,7 +37,9 @@
4337
// "ms-vscode.cmake-tools",
4438
// Git & Github
4539
// "GitHub.vscode-pull-request-github"
46-
"eamodio.gitlens"
40+
"eamodio.gitlens",
41+
// Docs
42+
"ms-vscode.live-server"
4743
],
4844
"settings": {
4945
"C_Cpp.intelliSenseEngine": "disabled",
Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
# Example .devcontainer/docker-compose.override.yml
2+
version: "3.9"
3+
services:
4+
tensorrt_llm-dev:
5+
volumes:
6+
# Uncomment the following lines to enable
7+
# # Mount TRTLLM data volume:
8+
# - /home/scratch.trt_llm_data/:/home/scratch.trt_llm_data/:ro

.devcontainer/docker-compose.yml

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
version: "3.9"
22
services:
33
tensorrt_llm-dev:
4-
image: urm.nvidia.com/sw-tensorrt-docker/tensorrt-llm:pytorch-25.05-py3-x86_64-ubuntu24.04-trt10.11.0.33-skip-tritondevel-202506271620-5539
4+
image: ${DEV_CONTAINER_IMAGE}
55
network_mode: host
66
ipc: host
77

@@ -22,7 +22,8 @@ services:
2222
capabilities: [gpu]
2323

2424
volumes:
25-
- ..:/workspaces/tensorrt_llm:cached
25+
- ${SOURCE_DIR}:/workspaces/tensorrt_llm
26+
- ${LOCAL_HF_HOME}:/huggingface # HF cache
2627

2728
environment:
2829
- CCACHE_DIR=/workspaces/tensorrt_llm/cpp/.ccache

.devcontainer/make_env.py

Lines changed: 219 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,219 @@
1+
#!/usr/bin/env python3
2+
3+
import json
4+
import logging
5+
import os
6+
import re
7+
import shlex
8+
import subprocess
9+
import sys
10+
from pathlib import Path
11+
from tempfile import TemporaryDirectory
12+
from typing import Dict, List, Optional
13+
14+
JENKINS_PROPS_PATH = Path("jenkins/current_image_tags.properties")
15+
DEV_CONTAINER_ENV_PATH = Path(".devcontainer/devcontainer.env")
16+
DEV_CONTAINER_USER_ENV_PATH = Path(".devcontainer/devcontainer.env.user")
17+
DOT_ENV_PATH = Path(".devcontainer/.env")
18+
COMPOSE_OVERRIDE_PATH = Path(".devcontainer/docker-compose.override.yml")
19+
COMPOSE_OVERRIDE_EXAMPLE_PATH = Path(
20+
".devcontainer/docker-compose.override-example.yml")
21+
22+
HOME_DIR_VAR = "HOME_DIR"
23+
SOURCE_DIR_VAR = "SOURCE_DIR"
24+
DEV_CONTAINER_IMAGE_VAR = "DEV_CONTAINER_IMAGE"
25+
BUILD_LOCAL_VAR = "BUILD_LOCAL"
26+
JENKINS_IMAGE_VAR = "LLM_DOCKER_IMAGE"
27+
LOCAL_HF_HOME_VAR = "LOCAL_HF_HOME"
28+
29+
LOGGER = logging.getLogger("make_env")
30+
31+
32+
def _load_env(env_files: List[Path]) -> Dict[str, str]:
33+
"""Evaluate files using 'sh' and return resulting environment."""
34+
with TemporaryDirectory("trtllm_make_env") as temp_dir:
35+
json_path = Path(temp_dir) / 'env.json'
36+
subprocess.run(
37+
("(echo set -a && cat " +
38+
" ".join(shlex.quote(str(env_file)) for env_file in env_files) +
39+
" && echo && echo exec /usr/bin/env python3 -c \"'import json; import os; print(json.dumps(dict(os.environ)))'\""
40+
+ f") | sh > {json_path}"),
41+
shell=True,
42+
check=True,
43+
)
44+
with open(json_path, "r") as f:
45+
env = json.load(f)
46+
return env
47+
48+
49+
def _detect_rootless() -> bool:
50+
proc = subprocess.run("./docker/detect_rootless.sh",
51+
capture_output=True,
52+
check=True,
53+
shell=True)
54+
return bool(int(proc.stdout.decode("utf-8").strip()))
55+
56+
57+
def _handle_rootless(env_inout: Dict[str, str]):
58+
is_rootless = _detect_rootless()
59+
if is_rootless:
60+
LOGGER.info("Docker Rootless Mode detected.")
61+
if HOME_DIR_VAR not in env_inout:
62+
raise ValueError(
63+
"Docker Rootless Mode requires setting HOME_DIR in devcontainer.env.user"
64+
)
65+
if SOURCE_DIR_VAR not in env_inout:
66+
raise ValueError(
67+
"Docker Rootless Mode requires setting SOURCE_DIR in devcontainer.env.user"
68+
)
69+
70+
# Handle HF_HOME
71+
if "HF_HOME" in os.environ and "HF_HOME" in env_inout:
72+
raise ValueError(
73+
"Docker Rootless Mode requires either not setting HF_HOME at all or overriding it in devcontainer.env.user"
74+
)
75+
if env_inout[LOCAL_HF_HOME_VAR].startswith(env_inout["HOME"]):
76+
env_inout[LOCAL_HF_HOME_VAR] = env_inout[LOCAL_HF_HOME_VAR].replace(
77+
env_inout["HOME"], env_inout[HOME_DIR_VAR], 1)
78+
else:
79+
env_inout[HOME_DIR_VAR] = env_inout["HOME"]
80+
env_inout[SOURCE_DIR_VAR] = os.getcwd()
81+
82+
83+
def _select_prebuilt_image(env: Dict[str, str]) -> Optional[str]:
84+
# Jenkins image
85+
candidate_images: List[str] = [env[JENKINS_IMAGE_VAR]]
86+
87+
# NGC images
88+
proc = subprocess.run(
89+
r"git tag --sort=creatordate --merged=HEAD | grep -E '^v[0-9]+\.[0-9]+\.[0-9]+' | sed -E 's/^v(.*)$/\1/' | tac",
90+
shell=True,
91+
capture_output=True,
92+
check=True,
93+
)
94+
for git_tag in proc.stdout.splitlines():
95+
git_tag = git_tag.strip()
96+
candidate_images.append(f"nvcr.io/nvidia/tensorrt-llm/devel:{git_tag}")
97+
98+
# Check image availability
99+
for candidate_image in candidate_images:
100+
LOGGER.info(f"Trying image {candidate_image}")
101+
102+
try:
103+
subprocess.run(
104+
f"docker run --rm -it --pull=missing --entrypoint=/bin/true {shlex.quote(candidate_image)}",
105+
check=True,
106+
shell=True)
107+
except subprocess.CalledProcessError:
108+
continue
109+
110+
LOGGER.info(f"Using image {candidate_image}")
111+
return candidate_image
112+
113+
LOGGER.info("No pre-built image found!")
114+
return None
115+
116+
117+
def _build_local_image() -> str:
118+
LOGGER.info("Building container image locally")
119+
120+
with TemporaryDirectory("trtllm_make_env") as temp_dir:
121+
log_path = Path(temp_dir) / "build.log"
122+
subprocess.run(
123+
f"make -C docker devel_build | tee {shlex.quote(str(log_path))}",
124+
check=True,
125+
shell=True,
126+
)
127+
with open(log_path) as f:
128+
build_log = f.read()
129+
130+
# Handle escaped and actual line breaks
131+
build_log_lines = re.sub(r"\\\n", " ", build_log).splitlines()
132+
for build_log_line in build_log_lines:
133+
tokens = shlex.split(build_log_line)
134+
if tokens[:3] != ["docker", "buildx", "build"]:
135+
continue
136+
token = None
137+
while tokens and not (token := tokens.pop(0)).startswith("--tag"):
138+
pass
139+
if token is None:
140+
continue
141+
if token.startswith("--arg="):
142+
token = token.removeprefix("--arg=")
143+
else:
144+
if not tokens:
145+
continue
146+
token = tokens.pop(0)
147+
return token # this is the image URI
148+
raise RuntimeError(
149+
f"Could not parse --tag argument from build log: {build_log}")
150+
151+
152+
def _ensure_compose_override():
153+
if not COMPOSE_OVERRIDE_PATH.exists():
154+
LOGGER.info(
155+
f"Creating initial {COMPOSE_OVERRIDE_PATH} from {COMPOSE_OVERRIDE_EXAMPLE_PATH}"
156+
)
157+
COMPOSE_OVERRIDE_PATH.write_bytes(
158+
COMPOSE_OVERRIDE_EXAMPLE_PATH.read_bytes())
159+
160+
161+
def _update_dot_env(env: Dict[str, str]):
162+
LOGGER.info(f"Updating {DOT_ENV_PATH}")
163+
164+
output_lines = [
165+
"# NOTE: This file is generated by make_env.py, modify devcontainer.env.user instead of this file.\n",
166+
"\n",
167+
]
168+
169+
for env_key, env_value in env.items():
170+
if os.environ.get(env_key) == env_value:
171+
# Only storing differences w.r.t. base env
172+
continue
173+
output_lines.append(f"{env_key}=\"{shlex.quote(env_value)}\"\n")
174+
175+
with open(DOT_ENV_PATH, "w") as f:
176+
f.writelines(output_lines)
177+
178+
179+
def main():
180+
env_files = [
181+
JENKINS_PROPS_PATH,
182+
DEV_CONTAINER_ENV_PATH,
183+
DEV_CONTAINER_USER_ENV_PATH,
184+
]
185+
186+
env = _load_env(env_files)
187+
_handle_rootless(env_inout=env)
188+
189+
# Determine container image to use
190+
image_uri = env.get(DEV_CONTAINER_IMAGE_VAR)
191+
if image_uri:
192+
LOGGER.info(f"Using user-provided container image: {image_uri}")
193+
else:
194+
build_local = bool(int(
195+
env[BUILD_LOCAL_VAR].strip())) if BUILD_LOCAL_VAR in env else None
196+
image_uri = None
197+
if not build_local:
198+
image_uri = _select_prebuilt_image(env)
199+
if image_uri is None:
200+
if build_local is False:
201+
raise RuntimeError(
202+
"No suitable container image found and local build disabled."
203+
)
204+
image_uri = _build_local_image()
205+
LOGGER.info(f"Using locally built container image: {image_uri}")
206+
env[DEV_CONTAINER_IMAGE_VAR] = image_uri
207+
208+
_ensure_compose_override()
209+
210+
_update_dot_env(env)
211+
212+
213+
if __name__ == "__main__":
214+
logging.basicConfig(level=logging.INFO)
215+
try:
216+
main()
217+
except Exception as e:
218+
LOGGER.error(f"{e.__class__.__name__}: {e}")
219+
sys.exit(-1)

.gitignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,9 +59,12 @@ llm-test-workspace/
5959
# Generated files
6060
cpp/include/tensorrt_llm/executor/version.h
6161
cpp/tensorrt_llm/kernels/contextFusedMultiHeadAttention/fmha_v2_cu/
62+
.devcontainer/.env
6263

6364
# User config files
6465
CMakeUserPresets.json
6566
compile_commands.json
6667
*.bin
6768
.dir-locals.el
69+
.devcontainer/devcontainer.env.user
70+
.devcontainer/docker-compose.override.yml

docker/Makefile

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ GROUP_ID ?= $(shell id --group)
1616
GROUP_NAME ?= $(shell id --group --name)
1717

1818
# Try to detect Docker rootless mode
19-
IS_ROOTLESS ?= $(shell if [ "$$(docker context inspect --format '{{.Endpoints.docker.Host}}' "$$(docker context show)")" = "unix:///run/user/$(USER_ID)/docker.sock" ]; then echo 1; else echo 0; fi)
19+
IS_ROOTLESS ?= $(shell ./detect_rootless.sh)
2020

2121
# Set this to 1 to add the current user to the docker image and run the container with the user
2222
LOCAL_USER ?= 0
@@ -72,7 +72,7 @@ define rewrite_tag
7272
$(shell echo $(IMAGE_WITH_TAG) | sed "s/\/tensorrt-llm:/\/tensorrt-llm-staging:/g")
7373
endef
7474

75-
%_build: DEVEL_IMAGE = $(if $(findstring 1,$(JENKINS_DEVEL)),$(shell grep '^[[:space:]]*LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"'))
75+
%_build: DEVEL_IMAGE = $(if $(findstring 1,$(JENKINS_DEVEL)),$(shell . ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE))
7676
%_build:
7777
@echo "Building docker image: $(IMAGE_WITH_TAG)"
7878
docker buildx build $(DOCKER_BUILD_OPTS) $(DOCKER_BUILD_ARGS) \
@@ -171,15 +171,15 @@ release_%: STAGE = release
171171
release_run: WORK_DIR = /app/tensorrt_llm
172172

173173
# For x86_64
174-
jenkins_%: IMAGE_WITH_TAG = $(shell grep '^[[:space:]]*LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
174+
jenkins_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE)
175175
jenkins_%: STAGE = tritondevel
176176

177177
# For aarch64
178-
jenkins-aarch64_%: IMAGE_WITH_TAG = $(shell grep '^[[:space:]]*LLM_SBSA_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
178+
jenkins-aarch64_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_SBSA_DOCKER_IMAGE)
179179
jenkins-aarch64_%: STAGE = tritondevel
180180

181181
# For x86_64
182-
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell grep '^[[:space:]]*LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"')
182+
jenkins-rockylinux8_%: IMAGE_WITH_TAG = $(shell . ../jenkins/current_image_tags.properties && echo $$LLM_ROCKYLINUX8_PY312_DOCKER_IMAGE)
183183
jenkins-rockylinux8_%: STAGE = tritondevel
184184
jenkins-rockylinux8_%: BASE_IMAGE = nvidia/cuda
185185
jenkins-rockylinux8_%: BASE_TAG = 12.9.0-devel-rockylinux8
@@ -197,11 +197,11 @@ trtllm_%: STAGE = release
197197
trtllm_%: PUSH_TO_STAGING := 0
198198
trtllm_%: DEVEL_IMAGE = $(shell \
199199
if [ "$(PLATFORM)" = "amd64" ]; then \
200-
grep '^[[:space:]]*LLM_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"'; \
200+
. ../jenkins/current_image_tags.properties && echo $$LLM_DOCKER_IMAGE; \
201201
elif [ "$(PLATFORM)" = "arm64" ]; then \
202-
grep '^[[:space:]]*LLM_SBSA_DOCKER_IMAGE = ' ../jenkins/L0_MergeRequest.groovy | grep -o '".*"' | tr -d '"'; \
202+
. ../jenkins/current_image_tags.properties && echo $$LLM_SBSA_DOCKER_IMAGE; \
203203
fi)
204-
trtllm_%: IMAGE_NAME = $(shell grep '^[[:space:]]*IMAGE_NAME = ' ../jenkins/BuildDockerImage.groovy | grep -o '".*"' | tr -d '"')
204+
trtllm_%: IMAGE_NAME = $(shell . ../jenkins/current_image_tags.properties && echo $$IMAGE_NAME)
205205
trtllm_%: IMAGE_TAG = $(shell git rev-parse --abbrev-ref HEAD | tr '/' '_')-$(PLATFORM)
206206
trtllm_run: WORK_DIR = /app/tensorrt_llm
207207

docker/detect_rootless.sh

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
#!/bin/sh
2+
3+
if [ "$(docker context inspect --format '{{.Endpoints.docker.Host}}' "$(docker context show)")" = "unix:///run/user/$(id -u)/docker.sock" ]; then
4+
echo 1
5+
else
6+
echo 0
7+
fi

docs/source/index.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,7 @@ Welcome to TensorRT-LLM's Documentation!
133133
reference/precision.md
134134
reference/memory.md
135135
reference/ci-overview.md
136+
reference/dev-containers.md
136137

137138

138139
.. toctree::

0 commit comments

Comments
 (0)