refactorings #197

Workflow file for this run

	name: Build Wheels
	on:
	workflow_dispatch:
	push:
	branches:
	- master
	- release-*
	- dev
	pull_request:
	branches:
	- master
	- dev

	jobs:
	wheel:
	name: Build wheel (CUDA ${{ matrix.cuda_version }})
	runs-on: ubuntu-24.04
	container:
	image: ${{ matrix.cuda_image }}

	strategy:
	matrix:
	include:
	- cuda_version: "12.8"
	cuda_image: "nvidia/cuda:12.8.1-cudnn-devel-ubuntu22.04"
	wheel_tag: "cu128"
	archs: "89;90;100;120"
	runtime_deps: \|
	nvidia-cuda-runtime-cu12>=12.8
	nvidia-cudnn-cu12>=9.0
	nvidia-cufile-cu12>=1.10
	nvidia-cublas-cu12>=12.8
	nvidia-nccl-cu12>=2.0
	- cuda_version: "13.0"
	cuda_image: "nvidia/cuda:13.0.1-cudnn-devel-ubuntu22.04"
	wheel_tag: "cu130"
	archs: "89;90;100f;120f"
	runtime_deps: \|
	cuda-toolkit[cudart,cublas,cufile]>=13.0
	nvidia-cudnn-cu13>=9.0
	nvidia-nccl-cu13>=2.0

	env:
	CMAKE_GENERATOR: Ninja
	CMAKE_C_COMPILER: gcc-12
	CMAKE_CXX_COMPILER: g++-12
	CMAKE_C_COMPILER_LAUNCHER: ccache
	CMAKE_CXX_COMPILER_LAUNCHER: ccache
	CMAKE_CUDA_COMPILER_LAUNCHER: ccache
	CUDAARCHS: ${{ matrix.archs }}

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Install deps
	run: apt update && apt install -y git g++-12

	- name: ccache
	uses: hendrikmuhs/[email protected]
	with:
	key: wheel-${{ matrix.wheel_tag }}

	- name: Install the latest version of uv
	uses: astral-sh/setup-uv@v7

	# we cannot statically declare the cuda dependencies in pyproject.toml
	# so we have to patch them here
	- name: Add CUDA runtime dependencies
	run: uv run --no-project --with tomlkit python3 .github/scripts/add_cuda_deps.py "${{ matrix.runtime_deps }}" "${{ matrix.wheel_tag }}"

	- name: Build wheel
	run: uv build --wheel

	- name: Repair wheel with auditwheel
	run: \|
	uv run --no-project --with auditwheel --with patchelf auditwheel repair dist/*.whl -w wheelhouse/ --exclude libcuda.so.1 --exclude libcudart.so.12 --exclude libcudart.so.13 --exclude libcudnn.so.9 --exclude libcufile.so.0 --exclude libnccl.so.2 --exclude libcublasLt.so.12 --exclude libcublasLt.so.13 --exclude libnvidia-ml.so.1
	rm dist/-linux_.whl # Remove non-repaired wheel

	- name: Upload wheel
	uses: actions/upload-artifact@v4
	with:
	name: wheel-${{ matrix.wheel_tag }}
	path: wheelhouse/pyllmq*.whl

	deploy-modal:
	name: Deploy to Modal
	needs: wheel
	runs-on: ubuntu-latest

	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Download wheel artifact
	uses: actions/download-artifact@v4
	with:
	name: wheel-cu128
	path: wheelhouse/

	- name: Install Modal
	run: pip install modal

	- name: Set Modal token
	run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }}

	- name: Deploy to Modal
	run: modal deploy scripts/modal_test_app.py

	test-modal-recompute:
	# These tests verify that recomputation options do not change the results at all
	name: Recompute - ${{ matrix.recompute.name }} - ${{ matrix.dtype.name }}
	needs: deploy-modal
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	max-parallel: 3
	matrix:
	recompute:
	- name: "Determinism test"
	args: ""
	- name: "Full Block"
	args: "--recompute-block --offload-residual --use-cuda-graphs"
	- name: "Blockwise"
	args: "--recompute-att --recompute-ffn"
	- name: "Non-linearities"
	args: "--recompute-swiglu --recompute-norm"
	- name: "Offload Opt"
	args: "--offload-opt-m --offload-opt-v --offload-master"
	- name: "Offload Gradient"
	args: "--shard-gradients --offload-grads"
	# While not strictly a recomputation, chunked attention should be bitwise identical, too
	- name: "Chunked attention"
	args: "--recompute-att --attn-bwd-chunks=2"
	dtype:
	- name: "BF16"
	args: "--matmul-dtype=bf16"
	- name: "FP8"
	args: "--matmul-dtype=e4m3"
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	# Note: No need to download wheel again, it's already in the deployed image

	- name: Install Modal
	run: pip install modal

	- name: Set Modal token
	run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }}

	- name: Run test on Modal
	run: python3 scripts/modal_test_ci.py recompute ${{ matrix.recompute.args }} ${{ matrix.dtype.args }}


	test-modal-fixed:
	# These tests run a few steps and compare the resulting losses and norms against a known, fixed reference
	# A failure in these tests doesn't necessarily mean that the code is broken, but it indicates that
	# the changes need to be carefully reviewed, and tested end-to-end, before the reference is updated.
	name: Test fixed - ${{ matrix.config.name }}
	needs: deploy-modal
	runs-on: ubuntu-latest
	strategy:
	fail-fast: false
	max-parallel: 3
	matrix:
	config:
	- name: "LLMQ BF16"
	args: "fixed bf16"
	- name: "LLMQ FP8"
	args: "fixed e4m3"
	- name: "LLMQ E5M2"
	args: "fixed e5m2"
	- name: "Torch BF16 ga=1"
	args: "torch-step --grad-accum 1 --model-dtype bf16 --matmul-dtype bf16"
	- name: "Torch BF16 ga=4"
	args: "torch-step --grad-accum 4 --model-dtype bf16 --matmul-dtype bf16"
	- name: "Torch FP32"
	args: "torch-step --grad-accum 2 --model-dtype fp32 --matmul-dtype fp32"
	- name: "Torch AMP"
	args: "torch-step --grad-accum 2 --model-dtype fp32 --matmul-dtype bf16"
	- name: "Torch Chunking"
	args: "torch-step --grad-accum 4 --model-dtype bf16 --matmul-dtype bf16 --lmhead-chunks 4"
	steps:
	- name: Checkout code
	uses: actions/checkout@v4

	- name: Install Modal
	run: pip install modal

	- name: Set Modal token
	run: modal token set --token-id ${{ secrets.MODAL_TOKEN_ID }} --token-secret ${{ secrets.MODAL_TOKEN_SECRET }}

	- name: Run test on Modal
	run: python3 scripts/modal_test_ci.py ${{ matrix.config.args }}

	release:
	if: github.event_name == 'workflow_dispatch' \|\| startsWith(github.ref, 'refs/heads/release-') \|\| startsWith(github.ref, 'refs/tags/')
	needs:
	- test-modal-recompute
	- test-modal-fixed

	runs-on: ubuntu-latest
	permissions:
	contents: write
	steps:
	- name: Download all wheels
	uses: actions/download-artifact@v4
	with:
	pattern: wheel-*
	path: wheelhouse/
	merge-multiple: true

	- name: Create Release
	uses: softprops/action-gh-release@v1
	with:
	tag_name: ${{ github.ref_name }}
	files: wheelhouse/*.whl
	generate_release_notes: true
	draft: true

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

refactorings #197

Workflow file

refactorings #197

Uh oh!

Workflow file for this run