File tree Expand file tree Collapse file tree 5 files changed +144
-16
lines changed Expand file tree Collapse file tree 5 files changed +144
-16
lines changed Original file line number Diff line number Diff line change
1
+ name : Run 1xH100 Tests
2
+
3
+ on :
4
+ push :
5
+ branches :
6
+ - main
7
+ - ' gh/**'
8
+ pull_request :
9
+ branches :
10
+ - main
11
+ - ' gh/**'
12
+
13
+ concurrency :
14
+ group : 1xH100_tests-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
15
+ cancel-in-progress : true
16
+
17
+ env :
18
+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
19
+
20
+ jobs :
21
+ test :
22
+ strategy :
23
+ fail-fast : false
24
+ matrix :
25
+ include :
26
+ - name : H100
27
+ runs-on : linux.aws.h100
28
+ torch-spec : ' --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
29
+ gpu-arch-type : " cuda"
30
+ gpu-arch-version : " 12.4"
31
+ permissions :
32
+ id-token : write
33
+ contents : read
34
+ uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
35
+ with :
36
+ timeout : 60
37
+ runner : ${{ matrix.runs-on }}
38
+ gpu-arch-type : ${{ matrix.gpu-arch-type }}
39
+ gpu-arch-version : ${{ matrix.gpu-arch-version }}
40
+ submodules : recursive
41
+ script : |
42
+ conda create -n venv python=3.9 -y
43
+ conda activate venv
44
+ export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
45
+ python -m pip install --upgrade pip
46
+ pip install uv
47
+ pip install ${{ matrix.torch-spec }}
48
+ uv pip install -r dev-requirements.txt
49
+ uv pip install vllm
50
+ pip install .
51
+ pytest test/integration --verbose -s
52
+ pytest test/dtypes/test_affine_quantized_float.py --verbose -s
53
+ ./test/float8/test_everything_single_gpu.sh
Original file line number Diff line number Diff line change 1
- name : Run Float8 Tests
1
+ name : Run 1xL4 Tests
2
2
3
3
on :
4
4
push :
11
11
- ' gh/**'
12
12
13
13
concurrency :
14
- group : float8_test -${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
14
+ group : 1xL4_tests -${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
15
15
cancel-in-progress : true
16
16
17
17
env :
28
28
torch-spec : ' --pre torch --index-url https://download.pytorch.org/whl/nightly/cu126'
29
29
gpu-arch-type : " cuda"
30
30
gpu-arch-version : " 12.6"
31
- - name : H100
32
- runs-on : linux.aws.h100.4
33
- torch-spec : ' --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
34
- gpu-arch-type : " cuda"
35
- gpu-arch-version : " 12.4"
36
31
permissions :
37
32
id-token : write
38
33
contents : read
53
48
uv pip install -r dev-requirements.txt
54
49
uv pip install vllm
55
50
pip install .
56
- pytest test/float8 --verbose -s
57
51
pytest test/integration --verbose -s
58
52
pytest test/dtypes/test_affine_quantized_float.py --verbose -s
59
- GPU_COUNT=$(nvidia-smi -L 2>/dev/null | wc -l)
60
- if [ "$GPU_COUNT" -ge 4 ]; then
61
- echo "Found $GPU_COUNT GPUs - running test_everything.sh"
62
- ./test/float8/test_everything.sh
63
- else
64
- echo "Only $GPU_COUNT GPUs available. Need at least 4 GPUs to run test_everything.sh"
65
- exit 0
66
- fi
53
+ ./test/float8/test_everything_single_gpu.sh
Original file line number Diff line number Diff line change
1
+ name : Run 4xH100 tests
2
+
3
+ on :
4
+ push :
5
+ branches :
6
+ - main
7
+ - ' gh/**'
8
+ pull_request :
9
+ branches :
10
+ - main
11
+ - ' gh/**'
12
+
13
+ concurrency :
14
+ group : 4xH100_tests-${{ github.workflow }}-${{ github.ref == 'refs/heads/main' && github.run_number || github.ref }}
15
+ cancel-in-progress : true
16
+
17
+ env :
18
+ HF_TOKEN : ${{ secrets.HF_TOKEN }}
19
+
20
+ jobs :
21
+ test :
22
+ strategy :
23
+ fail-fast : false
24
+ matrix :
25
+ include :
26
+ - name : H100
27
+ runs-on : linux.aws.h100.4
28
+ torch-spec : ' --pre torch torchvision torchaudio --index-url https://download.pytorch.org/whl/nightly/cu126'
29
+ gpu-arch-type : " cuda"
30
+ gpu-arch-version : " 12.4"
31
+ permissions :
32
+ id-token : write
33
+ contents : read
34
+ uses : pytorch/test-infra/.github/workflows/linux_job_v2.yml@main
35
+ with :
36
+ timeout : 60
37
+ runner : ${{ matrix.runs-on }}
38
+ gpu-arch-type : ${{ matrix.gpu-arch-type }}
39
+ gpu-arch-version : ${{ matrix.gpu-arch-version }}
40
+ submodules : recursive
41
+ script : |
42
+ conda create -n venv python=3.9 -y
43
+ conda activate venv
44
+ export PATH=/opt/rh/devtoolset-10/root/usr/bin/:$PATH
45
+ python -m pip install --upgrade pip
46
+ pip install uv
47
+ pip install ${{ matrix.torch-spec }}
48
+ uv pip install -r dev-requirements.txt
49
+ uv pip install vllm
50
+ pip install .
51
+ ./test/float8/test_everything_multi_gpu.sh
Original file line number Diff line number Diff line change
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD 3-Clause license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ #! /bin/bash
7
+
8
+ # terminate script on first error
9
+ set -e
10
+ IS_ROCM=$( rocm-smi --version || true)
11
+
12
+ # These tests do not work on ROCm yet
13
+ if [ -z " $IS_ROCM " ]
14
+ then
15
+ ./test/float8/test_fsdp.sh
16
+ ./test/float8/test_fsdp_compile.sh
17
+ ./test/float8/test_dtensor.sh
18
+ python test/float8/test_fsdp2/test_fsdp2.py
19
+ fi
20
+
21
+ echo " all multi gpu tests successful"
Original file line number Diff line number Diff line change
1
+ # Copyright (c) Meta Platforms, Inc. and affiliates.
2
+ # All rights reserved.
3
+ #
4
+ # This source code is licensed under the BSD 3-Clause license found in the
5
+ # LICENSE file in the root directory of this source tree.
6
+ #! /bin/bash
7
+
8
+ # terminate script on first error
9
+ set -e
10
+
11
+ pytest test/float8/test_base.py --verbose -s
12
+ pytest test/float8/test_compile.py --verbose -s
13
+ pytest test/float8/test_numerics_integration.py --verbose -s
14
+ pytest test/float8/test_auto_filter.py --verbose -s
15
+
16
+ echo " all float8 single gpu tests successful"
You can’t perform that action at this time.
0 commit comments