@@ -134,7 +134,6 @@ def run_test_deepeval(chat_model_name: str, personality_file_path: Path, auth_to
134134 outputs .append (output )
135135
136136 final_score = compute_deepeval_hallucination (inputs [:selection_num ], outputs [:selection_num ], contexts_res [:selection_num ])
137- print (f"final_score is { final_score } " )
138137 return final_score
139138
140139
@@ -203,7 +202,6 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
203202 for response_list_per_prompt in tqdm (response_list , desc = "predict hallucination ratio" ):
204203 score_list .append (check_eng .predict (response_list_per_prompt ))
205204 final_score = float (np .mean (score_list ))
206- print (f"final_score is { final_score } " )
207205 return final_score
208206
209207
@@ -213,13 +211,14 @@ def run_test_selfcheckgpt(chat_model_name: str, personality_file_path: Path, aut
213211
214212 parser = argparse .ArgumentParser ()
215213 parser .add_argument ("--chat_model" , type = str , default = "deepseek-ai/DeepSeek-R1-Distill-Qwen-7B" , help = "Path/name of the chat model" )
216- parser .add_argument ("--personality" , type = str , default = "healthcare_personality.yaml" , help = "Path to the YAML file with chatbot personality" )
214+ parser .add_argument ("--personality" , type = str , default = "../ healthcare_personality.yaml" , help = "Path to the YAML file with chatbot personality" )
217215 parser .add_argument ("--hf_token" , type = str , help = "HuggingFace access token to get Llama3" )
218216 parser .add_argument ("--check_type" , type = str , choices = ["deepeval" , "selfcheckgpt" ], default = "deepeval" , help = "Hallucination check type" )
219217 parser .add_argument ("--selection_num" , type = int , default = 5 , help = "Maximum number of prompt are selected to compute hallucination score" )
220218
221219 args = parser .parse_args ()
222220 if args .check_type == "deepeval" :
223- run_test_deepeval (args .chat_model , Path (args .personality ), args .hf_token , args .selection_num )
221+ hallucination_score = run_test_deepeval (args .chat_model , Path (args .personality ), args .hf_token , args .selection_num )
224222 else :
225- run_test_selfcheckgpt (args .chat_model , Path (args .personality ), args .hf_token , args .selection_num )
223+ hallucination_score = run_test_selfcheckgpt (args .chat_model , Path (args .personality ), args .hf_token , args .selection_num )
224+ print (f"hallucination_score for personality { args .personality } : { hallucination_score } " )
0 commit comments