@@ -39,15 +39,17 @@ def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None:
3939 "batchSize" : 10 ,
4040 "timeoutMinutes" : 20 ,
4141 "modelSettings" : [],
42- "createdAt" : datetime .now (timezone .utc ).isoformat ().replace (' +00:00' , 'Z' ),
43- "updatedAt" : datetime .now (timezone .utc ).isoformat ().replace (' +00:00' , 'Z' ),
44- "evaluations" : []
42+ "createdAt" : datetime .now (timezone .utc ).isoformat ().replace (" +00:00" , "Z" ),
43+ "updatedAt" : datetime .now (timezone .utc ).isoformat ().replace (" +00:00" , "Z" ),
44+ "evaluations" : [],
4545 }
4646
4747 # Ask if they want to add evaluations
4848 add_evals = self ._get_input ("Add evaluations now? (y/n): " ).lower ()
49- if add_evals in ['y' , 'yes' ]:
50- eval_set ["evaluations" ] = self ._add_evaluations_interactive (str (eval_set ["id" ]))
49+ if add_evals in ["y" , "yes" ]:
50+ eval_set ["evaluations" ] = self ._add_evaluations_interactive (
51+ str (eval_set ["id" ])
52+ )
5153
5254 # Ensure evaluationSets directory exists
5355 eval_sets_dir = self .project_root / "evaluationSets"
@@ -56,7 +58,7 @@ def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None:
5658 # Save file
5759 file_path = eval_sets_dir / filename
5860
59- with open (file_path , 'w' ) as f :
61+ with open (file_path , "w" ) as f :
6062 json .dump (eval_set , f , indent = 2 )
6163
6264 console .success (f"✅ Created eval set: { filename } " )
@@ -87,12 +89,16 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
8789
8890 evaluator_refs = []
8991 if self .evaluators :
90- refs_input = input ("➤ Select evaluators (comma-separated numbers, or 'all'): " ).strip ()
91- if refs_input .lower () == 'all' :
92- evaluator_refs = [self ._get_evaluator_id (path ) for eval_name , path in self .evaluators ]
92+ refs_input = input (
93+ "➤ Select evaluators (comma-separated numbers, or 'all'): "
94+ ).strip ()
95+ if refs_input .lower () == "all" :
96+ evaluator_refs = [
97+ self ._get_evaluator_id (path ) for eval_name , path in self .evaluators
98+ ]
9399 elif refs_input :
94100 try :
95- for num in refs_input .split (',' ):
101+ for num in refs_input .split ("," ):
96102 idx = int (num .strip ()) - 1
97103 if 0 <= idx < len (self .evaluators ):
98104 eval_path = self .evaluators [idx ][1 ]
@@ -109,7 +115,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
109115 while True :
110116 console .info (f"\n Test Case #{ test_count } " )
111117 test_name = input ("➤ Test Name (or 'done' to finish): " ).strip ()
112- if test_name .lower () == ' done' :
118+ if test_name .lower () == " done" :
113119 break
114120
115121 if not test_name :
@@ -118,7 +124,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
118124
119125 # Inputs
120126 console .info ("📥 Inputs (JSON format)" )
121- console .info (" Examples: {\" a \ " : 5, \" b \ " : 3} or {\ " query\ " : \ " hello world\" }" )
127+ console .info (' Examples: {"a ": 5, "b ": 3} or {"query": "hello world"}' )
122128 inputs_str = input ("➤ Inputs: " ).strip ()
123129 try :
124130 inputs = json .loads (inputs_str ) if inputs_str else {}
@@ -147,8 +153,12 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
147153 "simulateTools" : False ,
148154 "toolsToSimulate" : [],
149155 "evalSetId" : f"eval-{ len (self .eval_sets ) + 1 } " ,
150- "createdAt" : datetime .now (timezone .utc ).isoformat ().replace ('+00:00' , 'Z' ),
151- "updatedAt" : datetime .now (timezone .utc ).isoformat ().replace ('+00:00' , 'Z' )
156+ "createdAt" : datetime .now (timezone .utc )
157+ .isoformat ()
158+ .replace ("+00:00" , "Z" ),
159+ "updatedAt" : datetime .now (timezone .utc )
160+ .isoformat ()
161+ .replace ("+00:00" , "Z" ),
152162 }
153163 evaluations .append (evaluation )
154164 test_count += 1
@@ -167,9 +177,9 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
167177 "batchSize" : 10 ,
168178 "timeoutMinutes" : 20 ,
169179 "modelSettings" : [],
170- "createdAt" : datetime .now (timezone .utc ).isoformat ().replace (' +00:00' , 'Z' ),
171- "updatedAt" : datetime .now (timezone .utc ).isoformat ().replace (' +00:00' , 'Z' ),
172- "evaluations" : evaluations
180+ "createdAt" : datetime .now (timezone .utc ).isoformat ().replace (" +00:00" , "Z" ),
181+ "updatedAt" : datetime .now (timezone .utc ).isoformat ().replace (" +00:00" , "Z" ),
182+ "evaluations" : evaluations ,
173183 }
174184
175185 # Ensure evaluationSets directory exists
@@ -180,7 +190,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
180190 file_path = eval_sets_dir / filename
181191
182192 try :
183- with open (file_path , 'w' ) as f :
193+ with open (file_path , "w" ) as f :
184194 json .dump (eval_set , f , indent = 2 )
185195
186196 console .success (f"\n ✅ Created eval set: { filename } " )
@@ -193,15 +203,17 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
193203
194204 input ("\n Press Enter to continue..." )
195205
196- def _add_evaluations_interactive (self : "InteractiveEvalCLI" , eval_set_id : str ) -> List [Dict [str , Any ]]:
206+ def _add_evaluations_interactive (
207+ self : "InteractiveEvalCLI" , eval_set_id : str
208+ ) -> List [Dict [str , Any ]]:
197209 """Add evaluations interactively."""
198210 evaluations = []
199211 test_count = 1
200212
201213 while True :
202214 console .info (f"\n Test Case #{ test_count } " )
203215 test_name = self ._get_input ("Test Name (or 'done' to finish): " )
204- if test_name .lower () == ' done' :
216+ if test_name .lower () == " done" :
205217 break
206218
207219 if not test_name :
@@ -210,7 +222,7 @@ def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -
210222
211223 # Inputs
212224 console .info ("📥 Inputs (JSON format)" )
213- console .info (" Examples: {\" a \ " : 5, \" b \ " : 3} or {\ " query\ " : \ " hello world\" }" )
225+ console .info (' Examples: {"a ": 5, "b ": 3} or {"query": "hello world"}' )
214226 inputs_str = input ("➤ Inputs: " ).strip ()
215227 try :
216228 inputs = json .loads (inputs_str ) if inputs_str else {}
@@ -239,8 +251,12 @@ def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -
239251 "simulateTools" : False ,
240252 "toolsToSimulate" : [],
241253 "evalSetId" : eval_set_id ,
242- "createdAt" : datetime .now (timezone .utc ).isoformat ().replace ('+00:00' , 'Z' ),
243- "updatedAt" : datetime .now (timezone .utc ).isoformat ().replace ('+00:00' , 'Z' )
254+ "createdAt" : datetime .now (timezone .utc )
255+ .isoformat ()
256+ .replace ("+00:00" , "Z" ),
257+ "updatedAt" : datetime .now (timezone .utc )
258+ .isoformat ()
259+ .replace ("+00:00" , "Z" ),
244260 }
245261 evaluations .append (evaluation )
246262 test_count += 1
@@ -278,7 +294,9 @@ def _show_eval_set_preview(self: "InteractiveEvalCLI", path: Path) -> None:
278294 except Exception :
279295 console .info (f" 📄 { path .name } (error loading)" )
280296
281- def _show_eval_set_details (self : "InteractiveEvalCLI" , eval_set_tuple : tuple [str , Path ]) -> None :
297+ def _show_eval_set_details (
298+ self : "InteractiveEvalCLI" , eval_set_tuple : tuple [str , Path ]
299+ ) -> None :
282300 """Show detailed eval set view."""
283301 name , path = eval_set_tuple
284302 self ._clear_screen ()
@@ -296,26 +314,26 @@ def _show_eval_set_details(self: "InteractiveEvalCLI", eval_set_tuple: tuple[str
296314 console .info (f"📦 Batch Size: { data .get ('batchSize' , 'Unknown' )} " )
297315 console .info (f"⏱️ Timeout: { data .get ('timeoutMinutes' , 'Unknown' )} minutes" )
298316
299- evaluator_refs = data .get (' evaluatorRefs' , [])
317+ evaluator_refs = data .get (" evaluatorRefs" , [])
300318 if evaluator_refs :
301319 console .info ("\n 🎯 Evaluator References:" )
302320 for ref in evaluator_refs :
303321 console .info (f" • { ref } " )
304322
305- evaluations = data .get (' evaluations' , [])
323+ evaluations = data .get (" evaluations" , [])
306324 if evaluations :
307325 console .info ("\n 📝 Test Cases:" )
308326 for i , eval_data in enumerate (evaluations [:10 ], 1 ): # Show first 10
309- test_name = eval_data .get (' name' , f' Test { i } ' )
327+ test_name = eval_data .get (" name" , f" Test { i } " )
310328 console .info (f" { i } . { test_name } " )
311- if ' inputs' in eval_data :
312- inputs_preview = str (eval_data [' inputs' ])[:60 ]
313- if len (str (eval_data [' inputs' ])) > 60 :
329+ if " inputs" in eval_data :
330+ inputs_preview = str (eval_data [" inputs" ])[:60 ]
331+ if len (str (eval_data [" inputs" ])) > 60 :
314332 inputs_preview += "..."
315333 console .info (f" Input: { inputs_preview } " )
316- if ' expectedOutput' in eval_data :
317- output_preview = str (eval_data [' expectedOutput' ])[:60 ]
318- if len (str (eval_data [' expectedOutput' ])) > 60 :
334+ if " expectedOutput" in eval_data :
335+ output_preview = str (eval_data [" expectedOutput" ])[:60 ]
336+ if len (str (eval_data [" expectedOutput" ])) > 60 :
319337 output_preview += "..."
320338 console .info (f" Expected: { output_preview } " )
321339
0 commit comments