Add workflow

yzheng124 · yzheng124 · commit f01ab94e24b1 · 2025-03-07T09:34:24.000Z
Signed-off-by: yzheng124 &lt;yi.zheng@intel.com&gt;
diff --git a/.github/reusable-steps/categorize-projects/action.yml b/.github/reusable-steps/categorize-projects/action.yml
@@ -16,6 +16,8 @@ outputs:
     value: ${{ steps.group-subprojects.outputs.qt }}
   js:
     value: ${{ steps.group-subprojects.outputs.js }}
+  unittest:
+    value: ${{ steps.group-subprojects.outputs.unittest }}
 
 runs:
   using: 'composite'
@@ -42,6 +44,8 @@ runs:
             qt+=("$dir")
           elif [ -f "$dir/main.py" ] && grep -q -- "--stream" "$dir/main.py"; then
             webcam+=("$dir")
+          elif [ -d "$dir/test" ]; then
+            unittest+=("$dir/test")
           else
             python+=("$dir")
           fi
@@ -53,13 +57,15 @@ runs:
         webcam_json=$(printf '%s\n' "${webcam[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
         qt_json=$(printf '%s\n' "${qt[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
         js_json=$(printf '%s\n' "${js[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
+        unittest_json=$(printf '%s\n' "${unittest_json[@]}" | jq -R -s -c 'split("\n") | map(select(length > 0))')
 
         echo "notebook=$notebook_json" >> $GITHUB_OUTPUT
         echo "python=$python_json" >> $GITHUB_OUTPUT
         echo "gradio=$gradio_json" >> $GITHUB_OUTPUT
         echo "webcam=$webcam_json" >> $GITHUB_OUTPUT
         echo "qt=$qt_json" >> $GITHUB_OUTPUT
         echo "js=$js_json" >> $GITHUB_OUTPUT
+        echo "unittest_json=$unittest_json" >> $GITHUB_OUTPUT
     - name: Print subprojects to test
       shell: bash
       run: |
@@ -69,3 +75,4 @@ runs:
         echo "Webcam subprojects: ${{ steps.group-subprojects.outputs.webcam }}"
         echo "Qt subprojects: ${{ steps.group-subprojects.outputs.qt }}"
         echo "JS subprojects: ${{ steps.group-subprojects.outputs.js }}"
+        echo "Unit test subprojects: ${{ steps.group-subprojects.outputs.unittest }}"
diff --git a/.github/workflows/sanity-check-demos.yml b/.github/workflows/sanity-check-demos.yml
@@ -119,3 +119,34 @@ jobs:
           command: npm start
           project: ${{ matrix.subproject }}
           timeout: 1m
+
+  unittest:
+    needs: find-subprojects
+    if: ${{ needs.find-subprojects.outputs.unittest != '[]' }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      fail-fast: false
+      matrix:
+        os: [ubuntu-latest, windows-latest, macos-latest]
+        python: ["3.10", "3.12"]
+        subproject: ${{ fromJson(needs.find-subprojects.outputs.unittest) }}
+    steps:
+      - uses: actions/checkout@v4
+      - uses: ./.github/reusable-steps/setup-os
+      - name: Set up Python ${{ matrix.python }}
+        uses: actions/setup-python@v5
+        with:
+          python-version: ${{ matrix.python }}
+      - uses: ./.github/reusable-steps/setup-python
+        with:
+          python: ${{ matrix.python }}
+          project: ${{ matrix.subproject }}
+      - name: Login to HF
+        shell: bash
+        run: |
+          huggingface-cli login --token ${{ secrets.HF_TOKEN }}
+      - uses: ./.github/reusable-steps/timeouted-action
+        with:
+          script: python test.py
+          project: ${{ matrix.subproject }}
+          timeout: 5h
diff --git a/demos/virtual_ai_assistant_demo/test/README.md b/demos/virtual_ai_assistant_demo/test/README.md
diff --git a/demos/virtual_ai_assistant_demo/test/test.py b/demos/virtual_ai_assistant_demo/test/test.py
@@ -134,7 +134,6 @@ def run_test_deepeval(chat_model_name: str, personality_file_path: Path, auth_to
         outputs.append(output)
 
     final_score = compute_deepeval_hallucination(inputs[:selection_num], outputs[:selection_num], contexts_res[:selection_num])
-    print(f"final_score is {final_score}")
     return final_score
 
 
@@ -203,7 +202,6 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
     for response_list_per_prompt in tqdm(response_list, desc="predict hallucination ratio"):
         score_list.append(check_eng.predict(response_list_per_prompt))
     final_score = float(np.mean(score_list))
-    print(f"final_score is {final_score}")
     return final_score
 
 
@@ -213,13 +211,14 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
 
     parser = argparse.ArgumentParser()
     parser.add_argument("--chat_model", type=str, default="deepseek-ai/DeepSeek-R1-Distill-Qwen-7B", help="Path/name of the chat model")
-    parser.add_argument("--personality", type=str, default="healthcare_personality.yaml", help="Path to the YAML file with chatbot personality")
+    parser.add_argument("--personality", type=str, default="../healthcare_personality.yaml", help="Path to the YAML file with chatbot personality")
     parser.add_argument("--hf_token", type=str, help="HuggingFace access token to get Llama3")
     parser.add_argument("--check_type", type=str, choices=["deepeval", "selfcheckgpt"], default="deepeval", help="Hallucination check type")
     parser.add_argument("--selection_num", type=int, default=5, help="Maximum number of prompt are selected to compute hallucination score")
 
     args = parser.parse_args()
     if args.check_type == "deepeval":
-        run_test_deepeval(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
+        hallucination_score = run_test_deepeval(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
     else:
-        run_test_selfcheckgpt(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
+        hallucination_score = run_test_selfcheckgpt(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
+    print(f"hallucination_score for personality {args.personality}: {hallucination_score}")