code refactor

yzheng124 · yzheng124 · commit 4304c7bdca49 · 2025-03-07T08:35:59.000Z
Signed-off-by: yzheng124 &lt;yi.zheng@intel.com&gt;
diff --git a/demos/virtual_ai_assistant_demo/test/culinara_personality.txt b/demos/virtual_ai_assistant_demo/test/culinara_personality.txt
@@ -0,0 +1,20 @@
+I'm planning to cook a classic spaghetti carbonara. What ingredients do I need?
+Can I substitute pancetta with bacon in my carbonara?
+I'm planning to make a vegan lasagna. What can I use instead of ricotta cheese?
+How long should I bake my lasagna for the best results?
+I'm making a chicken curry. What spices should I use for an authentic flavor?
+Can I use coconut milk instead of cream in my chicken curry?
+I'm planning to bake a chocolate cake. What type of cocoa powder is best?
+Can I use almond flour instead of all-purpose flour in my cake?
+I'm making a Caesar salad. What ingredients are essential for the dressing?
+Can I use Greek yogurt instead of mayonnaise in my Caesar dressing?
+I'm planning to cook a beef stew. What cut of beef is best for stewing?
+Can I use red wine instead of beef broth in my stew?
+I'm planning to cook a seafood paella. What types of seafood are best to use?
+Can I use brown rice instead of white rice in my paella?
+How do I achieve the perfect socarrat (crispy bottom) in my paella?
+I'm making a vegetarian chili. What beans are best to use?
+Can I add quinoa to my chili for extra protein?
+I'm planning to bake a batch of cookies. What type of sugar should I use?
+Can I substitute butter with coconut oil in my cookies?
+I'm making a Greek salad. What ingredients are essential?
diff --git a/demos/virtual_ai_assistant_demo/test/test_vaa_hallucination.py b/demos/virtual_ai_assistant_demo/test/test_vaa_hallucination.py
@@ -30,7 +30,9 @@
 DATASET_MAPPING = {
     "agribot_personality.yaml": {"name": "KisanVaani/agriculture-qa-english-only", "split": "train", "col": "question"},
     "healthcare_personality.yaml": {"name": "medalpaca/medical_meadow_medical_flashcards", "split": "train", "col": "input"},
-    "bartender_personality.yaml": {"name": str(Path(__file__).parent / "bartender_personality.txt"), "col": "text"}
+    "bartender_personality.yaml": {"name": str(Path(__file__).parent / "bartender_personality.txt"), "col": "text"},
+    "culinara_personality.yaml": {"name": str(Path(__file__).parent / "culinara_personality.txt"), "col": "text"},
+    "tutor_personality.yaml": {"name": str(Path(__file__).parent / "tutor_personality.txt"), "col": "text"}
 }
 MODEL_DIR = Path("model")
 
@@ -56,12 +58,8 @@ def compute_deepeval_hallucination(inputs, outputs, contexts) -> float:
     return avg_score
 
 
-def extract_personality_path(path):
-    return os.path.basename(path)
-
-
-def prepare_dataset_and_model(chat_model_name, personality_file_path, auth_token):
-    dataset_info = DATASET_MAPPING.get(extract_personality_path(personality_file_path), "")
+def prepare_dataset_and_model(chat_model_name: str, personality_file_path: Path, auth_token: str):
+    dataset_info = DATASET_MAPPING.get(personality_file_path.name, "")
     assert dataset_info != ""
     log.info("Loading dataset")
     if dataset_info["name"].endswith(".txt"):
@@ -113,26 +111,31 @@ def load_chat_model(model_name: str, token: str = None) -> OpenVINOLLM:
                        model_kwargs={"ov_config": ov_config, "library_name": "transformers"}, generate_kwargs={"do_sample": True, "temperature": 0.7, "top_k": 50, "top_p": 0.95})
 
 
-def run_test_deepeval(chat_model_name, personality_file_path, auth_token):
+def run_test_deepeval(chat_model_name: str, personality_file_path: Path, auth_token: str, selection_num: int = 10) -> float:
+    """
+    Args:
+        chat_model_name (str): large language model path.
+        personality_file_path (Path): personality file path.
+        auth_token (str): auth token used for huggingface.
+        selection_num (int): maximum number of prompt are selected to compute hallucination score
+
+    Returns:
+        hallucination score: the higher the score, the higher possibility of having hallucination issue.
+    """
     dataset_question, ov_chat_engine = prepare_dataset_and_model(chat_model_name, personality_file_path, auth_token)
     inputs = dataset_question
     # We use question as context because the dataset lacks context
     contexts = dataset_question
     contexts_res = [[context] for context in contexts]
 
-    with open(personality_file_path, "rb") as f:
-        chatbot_config = yaml.safe_load(f)
-
-    ov_llm = load_chat_model(chat_model_name, auth_token)
-    ov_chat_engine = SimpleChatEngine.from_defaults(llm=ov_llm, system_prompt=chatbot_config["system_configuration"],
-                                                memory=ChatMemoryBuffer.from_defaults())
     outputs = []
-    for input in tqdm(inputs[:2]):
+    for input in tqdm(inputs[:selection_num]):
         output = ov_chat_engine.chat(input).response
         outputs.append(output)
 
-    final_score = compute_deepeval_hallucination(inputs[:2], outputs[:2], contexts_res[:2])
+    final_score = compute_deepeval_hallucination(inputs[:selection_num], outputs[:selection_num], contexts_res[:selection_num])
     print(f"final_score is {final_score}")
+    return final_score
 
 
 class OVSelfCheckLLMPrompt(SelfCheckLLMPrompt):
@@ -213,10 +216,10 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
     parser.add_argument("--personality", type=str, default="healthcare_personality.yaml", help="Path to the YAML file with chatbot personality")
     parser.add_argument("--hf_token", type=str, help="HuggingFace access token to get Llama3")
     parser.add_argument("--check_type", type=str, choices=["deepeval", "selfcheckgpt"], default="deepeval", help="Hallucination check type")
-    parser.add_argument("--selection_num", type=int, default=10, help="Maximum number of prompt are selected to compute hallucination score")
+    parser.add_argument("--selection_num", type=int, default=5, help="Maximum number of prompt are selected to compute hallucination score")
 
     args = parser.parse_args()
     if args.check_type == "deepeval":
-        run_test_deepeval(args.chat_model, Path(args.personality), args.hf_token)
+        run_test_deepeval(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
     else:
         run_test_selfcheckgpt(args.chat_model, Path(args.personality), args.hf_token, args.selection_num)
diff --git a/demos/virtual_ai_assistant_demo/test/tutor_personality.txt b/demos/virtual_ai_assistant_demo/test/tutor_personality.txt
@@ -0,0 +1,20 @@
+I'm struggling with understanding the concept of supply and demand in economics. Can you help explain it?
+How does the law of demand work in a real-world scenario?
+Can you give an example of how the law of supply affects market prices?
+What factors can cause a shift in the demand curve?
+How do changes in consumer income affect demand?
+What is the difference between a movement along the supply curve and a shift in the supply curve?
+How do technological advancements impact supply?
+Can you explain the concept of equilibrium price?
+What happens when there is a surplus in the market?
+How does a shortage affect market equilibrium?
+What role do government regulations play in supply and demand?
+How do taxes influence supply and demand?
+Can you explain the concept of price elasticity of demand?
+What factors determine the elasticity of a product?
+How does the elasticity of supply differ from the elasticity of demand?
+What is the significance of cross-price elasticity?
+How do substitute goods affect demand?
+Can you explain the concept of complementary goods?
+How do expectations of future prices impact current demand?
+What is the role of consumer preferences in determining demand?