@@ -71,7 +71,6 @@ def clean_score(score):
7171                    return  result 
7272                except  Exception  as  e :
7373                    print (f"Error in judge: { str (e )}  " )
74-                     # Return default scores 
7574                    return  type ('Result' , (), {
7675                        'accuracy' : '0' ,
7776                        'consistency' : '0' ,
@@ -119,12 +118,10 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases:
119118                expected_output = expected 
120119            )
121120
122-             # Calculate scores 
123121            accuracy_score  =  float (judgment .accuracy ) /  100 
124122            consistency_score  =  float (judgment .consistency ) /  100 
125123            is_equivalent  =  judgment .equivalence .lower () ==  "yes" 
126124
127-             # Store individual scores 
128125            case_scores  =  {
129126                "input" : input_text ,
130127                "expected" : expected ,
@@ -137,7 +134,6 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases:
137134            }
138135            individual_scores .append (case_scores )
139136
140-             # Update totals 
141137            total_accuracy  +=  accuracy_score 
142138            total_consistency  +=  consistency_score 
143139            total_similarity  +=  float (is_equivalent )
@@ -149,15 +145,13 @@ def _calculate_metrics(self, source_prompt: str, target_prompt: str, test_cases:
149145            print (f"Judge's reasoning: { judgment .reasoning }  " )
150146            print (f"Scores - Accuracy: { accuracy_score :.2f}  , Consistency: { consistency_score :.2f}  , Equivalent: { is_equivalent }  " )
151147
152-         # Calculate final metrics 
153148        metrics  =  EvaluationMetrics (
154149            accuracy = total_accuracy  /  num_cases ,
155150            similarity = total_similarity  /  num_cases ,
156151            consistency = total_consistency  /  num_cases ,
157152            individual_scores = individual_scores 
158153        )
159154
160-         # Save results to JSON 
161155        results  =  {
162156            "source_prompt" : source_prompt ,
163157            "target_prompt" : target_prompt ,
@@ -183,14 +177,12 @@ def evaluate(self,
183177
184178    def  _save_results (self , results : dict , filename : str  =  'results.json' ) ->  None :
185179        """Save results to a JSON file with a new name if the file already exists.""" 
186-          # Check if file exists 
180+ 
187181        if  os .path .exists (filename ):
188-             # Create new filename with timestamp 
189182            timestamp  =  datetime .now ().strftime ('%Y%m%d_%H%M%S' )
190183            base , ext  =  os .path .splitext (filename )
191184            filename  =  f"{ base }  _{ timestamp } { ext }  " 
192185
193-         # Save results 
194186        with  open (filename , 'w' ) as  f :
195187            json .dump (results , f , indent = 2 )
196188        print (f"Results saved to { filename }  " )
0 commit comments