refactorings #197
Workflow file for this run
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| name: Build Wheels | |
| on: | |
| workflow_dispatch: | |
| push: | |
| branches: | |
| - master | |
| - release-* | |
| - dev | |
| pull_request: | |
| branches: | |
| - master | |
| - dev | |
| jobs: | |
| wheel: | |
| name: Build wheel (CUDA ${{ matrix.cuda_version }}) | |
| runs-on: ubuntu-24.04 | |
| container: | |
| image: ${{ matrix.cuda_image }} | |
| strategy: | |
| matrix: | |
| include: | |
| - cuda_version: "12.8" | |
| cuda_image: "nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04" | |
| wheel_tag: "cu128" | |
| archs: "89;90;100;120" | |
| runtime_deps: | | |
| nvidia-cuda-runtime-cu12>=12.8 | |
| nvidia-cudnn-cu12>=9.0 | |
| nvidia-cufile-cu12>=1.10 | |
| nvidia-cublas-cu12>=12.8 | |
| nvidia-nccl-cu12>=2.0 | |
| - cuda_version: "13.0" | |
| cuda_image: "nvidia/cuda:13.0.1-cudnn-devel-ubuntu22.04" | |
| wheel_tag: "cu130" | |
| archs: "89;90;100f;120f" | |
| runtime_deps: | | |
| cuda-toolkit[cudart,cublas,cufile]>=13.0 | |
| nvidia-cudnn-cu13>=9.0 | |
| nvidia-nccl-cu13>=2.0 | |
| env: | |
| CMAKE_GENERATOR: Ninja | |
| CMAKE_C_COMPILER: gcc-12 | |
| CMAKE_CXX_COMPILER: g++-12 | |
| CMAKE_C_COMPILER_LAUNCHER: ccache | |
| CMAKE_CXX_COMPILER_LAUNCHER: ccache | |
| CMAKE_CUDA_COMPILER_LAUNCHER: ccache | |
| CUDAARCHS: ${{ matrix.archs }} | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Install deps | |
| run: apt update && apt install -y git g++-12 | |
| - name: ccache | |
| uses: hendrikmuhs/[email protected] | |
| with: | |
| key: wheel-${{ matrix.wheel_tag }} | |
| - name: Install the latest version of uv | |
| uses: astral-sh/setup-uv@v7 | |
| # we cannot statically declare the cuda dependencies in pyproject.toml | |
| # so we have to patch them here | |
| - name: Add CUDA runtime dependencies | |
| run: uv run --no-project --with tomlkit python3 .github/scripts/add_cuda_deps.py "${{ matrix.runtime_deps }}" "${{ matrix.wheel_tag }}" | |
| - name: Build wheel | |
| run: uv build --wheel | |
| - name: Repair wheel with auditwheel | |
| run: | | |
| uv run --no-project --with auditwheel --with patchelf auditwheel repair dist/*.whl -w wheelhouse/ --exclude libcuda.so.1 --exclude libcudart.so.12 --exclude libcudart.so.13 --exclude libcudnn.so.9 --exclude libcufile.so.0 --exclude libnccl.so.2 --exclude libcublasLt.so.12 --exclude libcublasLt.so.13 --exclude libnvidia-ml.so.1 | |
| rm dist/*-linux_*.whl # Remove non-repaired wheel | |
| - name: Upload wheel | |
| uses: actions/upload-artifact@v4 | |
| with: | |
| name: wheel-${{ matrix.wheel_tag }} | |
| path: wheelhouse/pyllmq*.whl | |
| deploy-modal: | |
| name: Deploy to Modal | |
| needs: wheel | |
| runs-on: ubuntu-latest | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Download wheel artifact | |
| uses: actions/download-artifact@v4 | |
| with: | |
| name: wheel-cu128 | |
| path: wheelhouse/ | |
| - name: Install Modal | |
| run: pip install modal | |
| - name: Set Modal token | |
| run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }} | |
| - name: Deploy to Modal | |
| run: modal deploy scripts/modal_test_app.py | |
| test-modal-recompute: | |
| # These tests verify that recomputation options do not change the results at all | |
| name: Recompute - ${{ matrix.recompute.name }} - ${{ matrix.dtype.name }} | |
| needs: deploy-modal | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 3 | |
| matrix: | |
| recompute: | |
| - name: "Determinism test" | |
| args: "" | |
| - name: "Full Block" | |
| args: "--recompute-block --offload-residual --use-cuda-graphs" | |
| - name: "Blockwise" | |
| args: "--recompute-att --recompute-ffn" | |
| - name: "Non-linearities" | |
| args: "--recompute-swiglu --recompute-norm" | |
| - name: "Offload Opt" | |
| args: "--offload-opt-m --offload-opt-v --offload-master" | |
| - name: "Offload Gradient" | |
| args: "--shard-gradients --offload-grads" | |
| # While not strictly a recomputation, chunked attention should be bitwise identical, too | |
| - name: "Chunked attention" | |
| args: "--recompute-att --attn-bwd-chunks=2" | |
| dtype: | |
| - name: "BF16" | |
| args: "--matmul-dtype=bf16" | |
| - name: "FP8" | |
| args: "--matmul-dtype=e4m3" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| # Note: No need to download wheel again, it's already in the deployed image | |
| - name: Install Modal | |
| run: pip install modal | |
| - name: Set Modal token | |
| run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }} | |
| - name: Run test on Modal | |
| run: python3 scripts/modal_test_ci.py recompute ${{ matrix.recompute.args }} ${{ matrix.dtype.args }} | |
| test-modal-fixed: | |
| # These tests run a few steps and compare the resulting losses and norms against a known, fixed reference | |
| # A failure in these tests doesn't necessarily mean that the code is broken, but it indicates that | |
| # the changes need to be *carefully* reviewed, and tested end-to-end, before the reference is updated. | |
| name: Test fixed - ${{ matrix.config.name }} | |
| needs: deploy-modal | |
| runs-on: ubuntu-latest | |
| strategy: | |
| fail-fast: false | |
| max-parallel: 3 | |
| matrix: | |
| config: | |
| - name: "LLMQ BF16" | |
| args: "fixed bf16" | |
| - name: "LLMQ FP8" | |
| args: "fixed e4m3" | |
| - name: "LLMQ E5M2" | |
| args: "fixed e5m2" | |
| - name: "Torch BF16 ga=1" | |
| args: "torch-step --grad-accum 1 --model-dtype bf16 --matmul-dtype bf16" | |
| - name: "Torch BF16 ga=4" | |
| args: "torch-step --grad-accum 4 --model-dtype bf16 --matmul-dtype bf16" | |
| - name: "Torch FP32" | |
| args: "torch-step --grad-accum 2 --model-dtype fp32 --matmul-dtype fp32" | |
| - name: "Torch AMP" | |
| args: "torch-step --grad-accum 2 --model-dtype fp32 --matmul-dtype bf16" | |
| - name: "Torch Chunking" | |
| args: "torch-step --grad-accum 4 --model-dtype bf16 --matmul-dtype bf16 --lmhead-chunks 4" | |
| steps: | |
| - name: Checkout code | |
| uses: actions/checkout@v4 | |
| - name: Install Modal | |
| run: pip install modal | |
| - name: Set Modal token | |
| run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }} | |
| - name: Run test on Modal | |
| run: python3 scripts/modal_test_ci.py ${{ matrix.config.args }} | |
| release: | |
| if: github.event_name == 'workflow_dispatch' || startsWith(github.ref, 'refs/heads/release-') || startsWith(github.ref, 'refs/tags/') | |
| needs: | |
| - test-modal-recompute | |
| - test-modal-fixed | |
| runs-on: ubuntu-latest | |
| permissions: | |
| contents: write | |
| steps: | |
| - name: Download all wheels | |
| uses: actions/download-artifact@v4 | |
| with: | |
| pattern: wheel-* | |
| path: wheelhouse/ | |
| merge-multiple: true | |
| - name: Create Release | |
| uses: softprops/action-gh-release@v1 | |
| with: | |
| tag_name: ${{ github.ref_name }} | |
| files: wheelhouse/*.whl | |
| generate_release_notes: true | |
| draft: true |