From 523626709e60f30b4d955c80ade903493eb99e10 Mon Sep 17 00:00:00 2001 From: Daniel Pressler Date: Tue, 16 Sep 2025 09:55:30 +0300 Subject: [PATCH] CI: update pytorch base image version Update to newer version to have cuda 12.9 alligned with build servers Signed-off-by: Daniel Pressler --- .ci/dockerfiles/Dockerfile.gpu_test | 4 ++-- .ci/jenkins/lib/build-matrix.yaml | 4 ++-- .ci/jenkins/lib/test-matrix.yaml | 2 +- contrib/aws-efa/README.md | 4 ++-- contrib/aws-efa/aws_job_def.json | 2 +- contrib/aws-efa/aws_test.sh | 4 ++-- 6 files changed, 10 insertions(+), 10 deletions(-) diff --git a/.ci/dockerfiles/Dockerfile.gpu_test b/.ci/dockerfiles/Dockerfile.gpu_test index 91e18a709..b3138a363 100644 --- a/.ci/dockerfiles/Dockerfile.gpu_test +++ b/.ci/dockerfiles/Dockerfile.gpu_test @@ -13,7 +13,7 @@ # docker run --gpus all --privileged -it nixl-gpu-test # # Build arguments: -# BASE_IMAGE: Base NVIDIA PyTorch image (default: nvcr.io/nvidia/pytorch:25.02-py3) +# BASE_IMAGE: Base NVIDIA PyTorch image (default: nvcr.io/nvidia/pytorch:25.06-py3) # _UID: User ID for the non-root user (default: 148069) # _GID: Group ID for the user (default: 30) # _LOGIN: Username (default: svc-nixl) @@ -22,7 +22,7 @@ # WORKSPACE: Workspace directory path # -ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.02-py3 +ARG BASE_IMAGE=nvcr.io/nvidia/pytorch:25.06-py3 FROM ${BASE_IMAGE} diff --git a/.ci/jenkins/lib/build-matrix.yaml b/.ci/jenkins/lib/build-matrix.yaml index 304ac34ce..c741af6de 100644 --- a/.ci/jenkins/lib/build-matrix.yaml +++ b/.ci/jenkins/lib/build-matrix.yaml @@ -6,7 +6,7 @@ # Key Components: # - Job Configuration: Defines timeout, failure behavior, and Kubernetes resources # - Docker Images: Specifies the container images used for different build stages -# - PyTorch images (24.10 and 25.02) for building and testing +# - PyTorch images (24.10 and 25.06) for building and testing # - Podman image for container builds # - Matrix Axes: Defines build variations (currently x86_64 architecture) # - Build Steps: Sequential steps for building, testing, and container creation @@ -34,7 +34,7 @@ kubernetes: requests: "{memory: 8Gi, cpu: 8000m}" runs_on_dockers: - - { name: "ubuntu24.04-pytorch", url: "nvcr.io/nvidia/pytorch:25.02-py3" } + - { name: "ubuntu24.04-pytorch", url: "nvcr.io/nvidia/pytorch:25.06-py3" } - { name: "ubuntu22.04-pytorch", url: "nvcr.io/nvidia/pytorch:24.10-py3" } - { name: "podman-v5.0.2", url: "quay.io/podman/stable:v5.0.2", category: 'tool', privileged: true } diff --git a/.ci/jenkins/lib/test-matrix.yaml b/.ci/jenkins/lib/test-matrix.yaml index 4a543f7dd..141f8d22b 100644 --- a/.ci/jenkins/lib/test-matrix.yaml +++ b/.ci/jenkins/lib/test-matrix.yaml @@ -30,7 +30,7 @@ runs_on_agents: matrix: axes: image: - - nvcr.io/nvidia/pytorch:25.02-py3 + - nvcr.io/nvidia/pytorch:25.06-py3 arch: - x86_64 diff --git a/contrib/aws-efa/README.md b/contrib/aws-efa/README.md index 829b05dd8..bcaa3d2d9 100644 --- a/contrib/aws-efa/README.md +++ b/contrib/aws-efa/README.md @@ -65,9 +65,9 @@ The AWS test script: ## Container Image -The script uses the container image: `nvcr.io/nvidia/pytorch:25.02-py3` +The script uses the container image: `nvcr.io/nvidia/pytorch:25.06-py3` You can override this by setting the `CONTAINER_IMAGE` environment variable: ```bash export CONTAINER_IMAGE="your-custom-image:tag" -``` \ No newline at end of file +``` diff --git a/contrib/aws-efa/aws_job_def.json b/contrib/aws-efa/aws_job_def.json index ccebe4bd1..086336777 100644 --- a/contrib/aws-efa/aws_job_def.json +++ b/contrib/aws-efa/aws_job_def.json @@ -15,7 +15,7 @@ "imagePullSecrets": [], "containers": [ { - "image": "nvcr.io/nvidia/pytorch:25.02-py3", + "image": "nvcr.io/nvidia/pytorch:25.06-py3", "command": [ "/bin/bash", "-c", diff --git a/contrib/aws-efa/aws_test.sh b/contrib/aws-efa/aws_test.sh index d91c40421..3492f2bbb 100755 --- a/contrib/aws-efa/aws_test.sh +++ b/contrib/aws-efa/aws_test.sh @@ -30,7 +30,7 @@ usage() { echo " GITHUB_REPOSITORY - GitHub repository (e.g., \"ai-dynamo/nixl\")" echo "" echo "Optional environment variables:" - echo " CONTAINER_IMAGE - Container image to use (default: nvcr.io/nvidia/pytorch:25.02-py3)" + echo " CONTAINER_IMAGE - Container image to use (default: nvcr.io/nvidia/pytorch:25.06-py3)" echo " TEST_TIMEOUT - Timeout for test execution in minutes" exit 1 } @@ -47,7 +47,7 @@ if [ -z "$GITHUB_REF" ] || [ -z "$GITHUB_SERVER_URL" ] || [ -z "$GITHUB_REPOSITO fi test_cmd="$1" -export CONTAINER_IMAGE=${CONTAINER_IMAGE:-"nvcr.io/nvidia/pytorch:25.02-py3"} +export CONTAINER_IMAGE=${CONTAINER_IMAGE:-"nvcr.io/nvidia/pytorch:25.06-py3"} # Set Git checkout command based on GITHUB_REF case "$GITHUB_REF" in