Skip to content

Commit 5e282c0

Browse files
RoshaniNwstcliyu
authored andcommitted
Add workflow for unit tests with Pathways backend
1 parent 0a364fe commit 5e282c0

File tree

2 files changed

+120
-3
lines changed

2 files changed

+120
-3
lines changed

.github/workflows/RunTests.yml

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,19 @@ jobs:
9090
container_resource_option: "--privileged"
9191
is_scheduled_run: ${{ github.event_name == 'schedule' }}
9292

93+
tpu_pathways_unit_tests:
94+
needs: tpu_image
95+
uses: ./.github/workflows/run_pathways_tests_internal.yml
96+
with:
97+
device_type: tpu
98+
device_name: v4-8
99+
cloud_runner: linux-x86-ct4p-240-4tpu
100+
pytest_marker: 'not cpu_only and not gpu_only and not integration_test'
101+
xla_python_client_mem_fraction: 0.75
102+
tf_force_gpu_allow_growth: false
103+
container_resource_option: "--privileged"
104+
is_scheduled_run: ${{ github.event_name == 'schedule' }}
105+
93106
tpu_integration_tests:
94107
needs: tpu_image
95108
uses: ./.github/workflows/run_tests_internal.yml
@@ -131,7 +144,7 @@ jobs:
131144

132145
clean_up:
133146
if: ${{ always() }}
134-
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests]
147+
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests, tpu_pathways_unit_tests]
135148
name: "Clean up"
136149
runs-on: ["self-hosted"]
137150
permissions:
@@ -150,7 +163,7 @@ jobs:
150163

151164
notify_failure:
152165
name: Notify failed build # creates an issue or modifies last open existing issue for failed build
153-
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests]
166+
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests, tpu_pathways_unit_tests]
154167
if: ${{ always() }}
155168
runs-on: ubuntu-latest
156169
permissions:
@@ -182,7 +195,7 @@ jobs:
182195
name: Close issue after 3 successful builds
183196
# This job runs only if all the preceding test jobs succeeded
184197
if: ${{ success() && github.event.pull_request == null && github.event_name != 'workflow_dispatch' }}
185-
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests]
198+
needs: [cpu_unit_tests, gpu_unit_tests, gpu_integration_tests, tpu_unit_tests, tpu_integration_tests, tpu_pathways_unit_tests]
186199
runs-on: ubuntu-latest
187200
permissions:
188201
issues: write
Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
# Copyright 2025 Google LLC
2+
#
3+
# Licensed under the Apache License, Version 2.0 (the "License");
4+
# you may not use this file except in compliance with the License.
5+
# You may obtain a copy of the License at
6+
#
7+
# https://www.apache.org/licenses/LICENSE-2.0
8+
#
9+
# Unless required by applicable law or agreed to in writing, software
10+
# distributed under the License is distributed on an "AS IS" BASIS,
11+
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12+
# See the License for the specific language governing permissions and
13+
# limitations under the License.
14+
15+
# This file runs unit tests with Pathways backend.
16+
17+
name: Run Pathways Tests
18+
19+
on:
20+
workflow_call:
21+
inputs:
22+
device_type:
23+
required: true
24+
type: string
25+
device_name:
26+
required: true
27+
type: string
28+
image_type:
29+
required: false
30+
type: string
31+
pytest_marker:
32+
required: true
33+
type: string
34+
is_scheduled_run:
35+
required: true
36+
type: string
37+
xla_python_client_mem_fraction:
38+
required: true
39+
type: string
40+
tf_force_gpu_allow_growth:
41+
required: true
42+
type: string
43+
container_resource_option:
44+
required: true
45+
type: string
46+
cloud_runner:
47+
required: false
48+
type: string
49+
50+
jobs:
51+
run:
52+
runs-on: ${{ inputs.cloud_runner != '' && inputs.cloud_runner || fromJson(format('["self-hosted", "{0}", "{1}"]', inputs.device_type, inputs.device_name)) }}
53+
container:
54+
image: gcr.io/tpu-prod-env-multipod/maxtext_${{ github.run_id }}:${{ inputs.image_type != '' && inputs.image_type || inputs.device_type }}
55+
volumes:
56+
- /home/runner/actions-runner/_work/maxtext/maxtext:/deps
57+
env:
58+
XLA_PYTHON_CLIENT_MEM_FRACTION: ${{ inputs.xla_python_client_mem_fraction }}
59+
TF_FORCE_GPU_ALLOW_GROWTH: ${{ inputs.tf_force_gpu_allow_growth }}
60+
TPU_SKIP_MDS_QUERY: ${{ inputs.image_type == 'tpu' && inputs.device_type != 'tpu' && '1' || '' }}
61+
JAX_PLATFORMS: "proxy"
62+
JAX_BACKEND_TARGET: "grpc://proxy:29000"
63+
options: ${{ inputs.container_resource_option }}
64+
steps:
65+
- uses: actions/checkout@v4
66+
- name: Run Tests
67+
run: |
68+
if [ "${{ inputs.is_scheduled_run }}" = "true" ]; then
69+
FINAL_PYTEST_MARKER="${{ inputs.pytest_marker }}"
70+
else
71+
FINAL_PYTEST_MARKER="${{ inputs.pytest_marker }} and not scheduled_only"
72+
fi
73+
python3 -m pip install -e . --no-dependencies &&
74+
python3 -m pytest -v -m "${FINAL_PYTEST_MARKER}" --durations=0
75+
76+
services:
77+
resource_manager:
78+
image: us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest
79+
ports:
80+
- "29001:29001"
81+
- "29002:29002"
82+
options:
83+
--entrypoint=[/usr/pathways/run/cloud_pathways_server_sanitized, --server_port=29001, --node_type=resource_manager, --instance_count=1, --instance_type=tpuv4:2x2x1, --gcs_scratch_location=gs://cloud-pathways-staging/tmp]
84+
env:
85+
HOST_ADDRESS: resource_manager
86+
TPU_SKIP_MDS_QUERY: true
87+
88+
worker:
89+
image: us-docker.pkg.dev/cloud-tpu-v2-images/pathways/server:latest
90+
ports:
91+
- "29005:29005"
92+
- "29006:29006"
93+
- "8471:8471"
94+
- "8080:8080"
95+
options:
96+
--privileged
97+
--entrypoint=[/usr/pathways/run/cloud_pathways_server_sanitized, --server_port=29005, --resource_manager_address=resource_manager:29001, --gcs_scratch_location=gs://cloud-pathways-staging/tmp]
98+
99+
proxy:
100+
image: us-docker.pkg.dev/cloud-tpu-v2-images/pathways/proxy_server:latest
101+
ports:
102+
- "29000:29000"
103+
options:
104+
--entrypoint=[/usr/pathways/run/cloud_proxy_server_sanitized, --server_port=29000, --resource_manager_address=resource_manager:29001, --gcs_scratch_location=gs://cloud-pathways-staging/tmp]

0 commit comments

Comments
 (0)