ROCm · mawad-amd · Sep 17, 2025 · Sep 17, 2025 · Sep 17, 2025 · Sep 18, 2025
@@ -0,0 +1,72 @@
+name: Iris External Validation Test
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+  build-apptainer-image:
+    runs-on: [self-hosted, mi3008x]
+    timeout-minutes: 90
+
+    steps:
+      - name: Setup Apptainer
+        run: |
+          apt-get update && apt-get install -y software-properties-common
+          add-apt-repository -y ppa:apptainer/ppa
+          apt-get update && apt-get install -y apptainer
+
+      - name: Build Iris Apptainer container
+        run: |
+          # Create persistent Apptainer directory
+          mkdir -p ~/apptainer
+
+          # Build Apptainer image from definition file (only if it doesn't exist)
+          if [ ! -f ~/apptainer/iris-dev.sif ]; then
+            echo "Building new Apptainer image..."
+            apptainer build ~/apptainer/iris-dev.sif apptainer/iris.def
+          else
+            echo "Using existing Apptainer image"
+          fi
+
+  external-validation-test:
+    name: External Validation Test
+    needs: build-apptainer-image
+    runs-on: [self-hosted, mi3008x]
+    timeout-minutes: 30
+
+    steps:
+      - name: Run External Validation Test with Apptainer
+        run: |
+          apptainer exec ~/apptainer/iris-dev.sif bash -c "
+            set -e  # Exit on any error
+
+            # Setup Python
+            python3 -m pip install --upgrade pip
+
+            # Uninstall any existing Iris installations
+            echo 'Uninstalling any existing Iris packages...'
+            pip uninstall -y Iris iris || echo 'No existing Iris packages found or uninstall failed'
+            rm -rf build dist *.egg-info
+
+            # Install iris from the current repository
+            echo 'Installing iris from current repository...'
+            pip install --force-reinstall --no-deps git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
+
+            # Download test script from gist
+            echo 'Downloading test script from gist...'
+            wget -O test_iris_distributed.py https://gist.githubusercontent.com/mawad-amd/6375dc078e39e256828f379e03310ec7/raw/a527c3192bee4615292769e340b1c73676f6945a/test_iris_distributed.py
+
+            # Run the external validation test
+            echo 'Running iris external validation test...'
+            python test_iris_distributed.py
+
+            echo 'External validation test completed successfully!'
+          "
@@ -0,0 +1,88 @@
+name: Iris Pip Install Test
+
+on:
+  push:
+    branches: [ main ]
+  pull_request:
+    branches: [ main ]
+  workflow_dispatch:
+
+concurrency:
+  group: ${{ github.workflow }}-${{ github.head_ref || github.ref }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/main' }}
+
+jobs:
+  build-apptainer-image:
+    runs-on: [self-hosted, mi3008x]
+    timeout-minutes: 90
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Setup Apptainer
+        run: |
+          apt-get update && apt-get install -y software-properties-common
+          add-apt-repository -y ppa:apptainer/ppa
+          apt-get update && apt-get install -y apptainer
+
+      - name: Build Iris Apptainer container
+        run: |
+          # Create persistent Apptainer directory
+          mkdir -p ~/apptainer
+
+          # Build Apptainer image from definition file (only if it doesn't exist)
+          if [ ! -f ~/apptainer/iris-dev.sif ]; then
+            echo "Building new Apptainer image..."
+            apptainer build ~/apptainer/iris-dev.sif apptainer/iris.def
+          else
+            echo "Using existing Apptainer image"
+          fi
+
+  pip-install-test:
+    name: ${{ matrix.ranks }}-rank Pip Install Test
+    needs: [build-apptainer-image]
+    runs-on: [self-hosted, mi3008x]
+    timeout-minutes: 30
+    strategy:
+      matrix:
+        ranks: [1, 2, 4, 8]
+      max-parallel: 1
+
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+        with:
+          fetch-depth: 0 
+
+      - name: Run Pip Install Test with Apptainer
+        run: |
+          apptainer exec ~/apptainer/iris-dev.sif bash -c "
+            set -e  # Exit on any error
+
+            # Setup Python
+            python3 -m pip install --upgrade pip
+            pip install pytest
+
+            # Uninstall any existing Iris installations
+            echo 'Uninstalling any existing Iris packages...'
+            pip uninstall -y Iris iris || echo 'No existing Iris packages found or uninstall failed'
+            rm -rf build dist *.egg-info
+
+            # Install iris from the current repository
+            pip install --force-reinstall --no-deps git+https://github.com/${{ github.repository }}.git@${{ github.sha }}
+
+            # Run examples tests one at a time using distributed wrapper
+            echo 'Running examples tests one at a time...'
+            for test_file in tests/examples/test_*.py; do
+              echo \"Testing: \$test_file with ${{ matrix.ranks }} ranks\"
+              python tests/run_tests_distributed.py --num_ranks ${{ matrix.ranks }} \"\$test_file\" -v --tb=short
+            done
+
+            # Run unit tests one at a time using distributed wrapper
+            echo 'Running unit tests one at a time...'
+            for test_file in tests/unittests/test_*.py; do
+              echo \"Testing: \$test_file with ${{ matrix.ranks }} ranks\"
+              python tests/run_tests_distributed.py --num_ranks ${{ matrix.ranks }} \"\$test_file\" -v --tb=short
+            done
+          "
@@ -40,7 +40,7 @@ jobs:
           fi
   run-tests:
     name: ${{ matrix.ranks }}-rank Iris Test
-    needs: build-apptainer-image
+    needs: [build-apptainer-image]
     runs-on: [self-hosted, mi3008x]
     timeout-minutes: 20
     strategy:
@@ -57,8 +57,12 @@ jobs:
           apptainer exec ~/apptainer/iris-dev.sif bash -c "
             set -e  # Exit on any error
 
+            # Uninstall any existing Iris installations
+            pip uninstall -y Iris iris
+            rm -rf build dist *.egg-info
+
             # Install iris first
-            pip install -e .
+            pip install -e . --force-reinstall --no-deps
 
             # Run examples tests one at a time using distributed wrapper
             echo 'Running examples tests one at a time...'

@@ -44,6 +44,9 @@ def _distributed_worker(rank, world_size, test_file, pytest_args):
         try:
             # Run pytest directly in this process
             exit_code = pytest.main([test_file] + pytest_args)
+            # If tests failed, exit with the failure code
+            if exit_code != 0:
+                sys.exit(exit_code)
             return exit_code
         finally:
             # Restore original argv
@@ -82,7 +85,19 @@ def main():
     print(f"args={args}, test_file={test_file}, pytest_args={pytest_args}")
 
     # Run all tests within a single distributed process group
-    mp.spawn(_distributed_worker, args=(num_ranks, test_file, pytest_args), nprocs=num_ranks, join=True)
+    try:
+        mp.spawn(
+            _distributed_worker,
+            args=(num_ranks, test_file, pytest_args),
+            nprocs=num_ranks,
+            join=True,
+        )
+    except SystemExit as e:
+        # Catch sys.exit() from worker and return same exit code
+        sys.exit(e.code if isinstance(e.code, int) else 1)
+    except Exception:
+        # Any other unhandled exception = failure
+        sys.exit(1)
 
 
 if __name__ == "__main__":