Skip to content
Open
72 changes: 72 additions & 0 deletions .github/workflows/iris-external-validation-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@
name: Iris External Validation Test

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

jobs:
build-apptainer-image:
runs-on: [self-hosted, mi3008x]
timeout-minutes: 90

steps:
- name: Setup Apptainer
run: |
apt-get update && apt-get install -y software-properties-common
add-apt-repository -y ppa:apptainer/ppa
apt-get update && apt-get install -y apptainer

- name: Build Iris Apptainer container
run: |
# Create persistent Apptainer directory
mkdir -p ~/apptainer

# Build Apptainer image from definition file (only if it doesn't exist)
if [ ! -f ~/apptainer/iris-dev.sif ]; then
echo "Building new Apptainer image..."
apptainer build ~/apptainer/iris-dev.sif apptainer/iris.def
else
echo "Using existing Apptainer image"
fi

external-validation-test:
name: External Validation Test
needs: build-apptainer-image
runs-on: [self-hosted, mi3008x]
timeout-minutes: 30

steps:
- name: Run External Validation Test with Apptainer
run: |
apptainer exec ~/apptainer/iris-dev.sif bash -c "
set -e # Exit on any error

# Setup Python
python3 -m pip install --upgrade pip

# Uninstall any existing Iris installations
echo 'Uninstalling any existing Iris packages...'
pip uninstall -y Iris iris || echo 'No existing Iris packages found or uninstall failed'
rm -rf build dist *.egg-info

# Install iris from the current repository
echo 'Installing iris from current repository...'
pip install --force-reinstall --no-deps git+https://github.com/${{ github.repository }}.git@${{ github.sha }}

# Download test script from gist
echo 'Downloading test script from gist...'
wget -O test_iris_distributed.py https://gist.githubusercontent.com/mawad-amd/6375dc078e39e256828f379e03310ec7/raw/a527c3192bee4615292769e340b1c73676f6945a/test_iris_distributed.py

# Run the external validation test
echo 'Running iris external validation test...'
python test_iris_distributed.py

echo 'External validation test completed successfully!'
"
88 changes: 88 additions & 0 deletions .github/workflows/iris-pip-install-test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,88 @@
name: Iris Pip Install Test

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]
workflow_dispatch:

concurrency:
group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}

jobs:
build-apptainer-image:
runs-on: [self-hosted, mi3008x]
timeout-minutes: 90

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Setup Apptainer
run: |
apt-get update && apt-get install -y software-properties-common
add-apt-repository -y ppa:apptainer/ppa
apt-get update && apt-get install -y apptainer

- name: Build Iris Apptainer container
run: |
# Create persistent Apptainer directory
mkdir -p ~/apptainer

# Build Apptainer image from definition file (only if it doesn't exist)
if [ ! -f ~/apptainer/iris-dev.sif ]; then
echo "Building new Apptainer image..."
apptainer build ~/apptainer/iris-dev.sif apptainer/iris.def
else
echo "Using existing Apptainer image"
fi

pip-install-test:
name: ${{ matrix.ranks }}-rank Pip Install Test
needs: [build-apptainer-image]
runs-on: [self-hosted, mi3008x]
timeout-minutes: 30
strategy:
matrix:
ranks: [1, 2, 4, 8]
max-parallel: 1

steps:
- name: Checkout repository
uses: actions/checkout@v4
with:
fetch-depth: 0

- name: Run Pip Install Test with Apptainer
run: |
apptainer exec ~/apptainer/iris-dev.sif bash -c "
set -e # Exit on any error

# Setup Python
python3 -m pip install --upgrade pip
pip install pytest

# Uninstall any existing Iris installations
echo 'Uninstalling any existing Iris packages...'
pip uninstall -y Iris iris || echo 'No existing Iris packages found or uninstall failed'
rm -rf build dist *.egg-info

# Install iris from the current repository
pip install --force-reinstall --no-deps git+https://github.com/${{ github.repository }}.git@${{ github.sha }}

# Run examples tests one at a time using distributed wrapper
echo 'Running examples tests one at a time...'
for test_file in tests/examples/test_*.py; do
echo \"Testing: \$test_file with ${{ matrix.ranks }} ranks\"
python tests/run_tests_distributed.py --num_ranks ${{ matrix.ranks }} \"\$test_file\" -v --tb=short
done

# Run unit tests one at a time using distributed wrapper
echo 'Running unit tests one at a time...'
for test_file in tests/unittests/test_*.py; do
echo \"Testing: \$test_file with ${{ matrix.ranks }} ranks\"
python tests/run_tests_distributed.py --num_ranks ${{ matrix.ranks }} \"\$test_file\" -v --tb=short
done
"
8 changes: 6 additions & 2 deletions .github/workflows/iris-tests-apptainer.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ jobs:
fi
run-tests:
name: ${{ matrix.ranks }}-rank Iris Test
needs: build-apptainer-image
needs: [build-apptainer-image]
runs-on: [self-hosted, mi3008x]
timeout-minutes: 20
strategy:
Expand All @@ -57,8 +57,12 @@ jobs:
apptainer exec ~/apptainer/iris-dev.sif bash -c "
set -e # Exit on any error

# Uninstall any existing Iris installations
pip uninstall -y Iris iris
rm -rf build dist *.egg-info

# Install iris first
pip install -e .
pip install -e . --force-reinstall --no-deps

# Run examples tests one at a time using distributed wrapper
echo 'Running examples tests one at a time...'
Expand Down
17 changes: 16 additions & 1 deletion tests/run_tests_distributed.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,9 @@ def _distributed_worker(rank, world_size, test_file, pytest_args):
try:
# Run pytest directly in this process
exit_code = pytest.main([test_file] + pytest_args)
# If tests failed, exit with the failure code
if exit_code != 0:
sys.exit(exit_code)
return exit_code
finally:
# Restore original argv
Expand Down Expand Up @@ -82,7 +85,19 @@ def main():
print(f"args={args}, test_file={test_file}, pytest_args={pytest_args}")

# Run all tests within a single distributed process group
mp.spawn(_distributed_worker, args=(num_ranks, test_file, pytest_args), nprocs=num_ranks, join=True)
try:
mp.spawn(
_distributed_worker,
args=(num_ranks, test_file, pytest_args),
nprocs=num_ranks,
join=True,
)
except SystemExit as e:
# Catch sys.exit() from worker and return same exit code
sys.exit(e.code if isinstance(e.code, int) else 1)
except Exception:
# Any other unhandled exception = failure
sys.exit(1)


if __name__ == "__main__":
Expand Down
Loading