From e7dff17b2c1bb8c50ec36e4e07162553894304c3 Mon Sep 17 00:00:00 2001 From: Chibi Vikramathithan Date: Tue, 30 Sep 2025 17:55:18 -0700 Subject: [PATCH 1/6] fix: adding interactive mode for eval --- .../comprehensive_calculator_tests.json | 118 ++ .../calculator/evaluators/exact_match.json | 10 + .../evaluators/json_similarity.json | 10 + src/uipath/_cli/_eval_interactive.py | 1199 +++++++++++++++++ src/uipath/_cli/_utils/_eval_set.py | 15 +- src/uipath/_cli/cli_eval.py | 77 ++ 6 files changed, 1424 insertions(+), 5 deletions(-) create mode 100644 samples/calculator/evaluationSets/comprehensive_calculator_tests.json create mode 100644 samples/calculator/evaluators/exact_match.json create mode 100644 samples/calculator/evaluators/json_similarity.json create mode 100644 src/uipath/_cli/_eval_interactive.py diff --git a/samples/calculator/evaluationSets/comprehensive_calculator_tests.json b/samples/calculator/evaluationSets/comprehensive_calculator_tests.json new file mode 100644 index 000000000..f8c941cb2 --- /dev/null +++ b/samples/calculator/evaluationSets/comprehensive_calculator_tests.json @@ -0,0 +1,118 @@ +{ + "id": "calc-comprehensive-001", + "fileName": "comprehensive_eval_set.json", + "evaluatorRefs": ["exact-match-eval", "json-similarity-eval"], + "name": "Comprehensive Calculator Tests", + "batchSize": 10, + "timeoutMinutes": 10, + "modelSettings": [], + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z", + "evaluations": [ + { + "id": "add-basic", + "name": "Basic Addition", + "inputs": { + "a": 5, + "b": 3, + "operator": "+" + }, + "expectedOutput": { + "result": 8.0 + }, + "expectedAgentBehavior": "Add two positive numbers", + "simulationInstructions": "", + "simulateInput": false, + "inputGenerationInstructions": "", + "simulateTools": false, + "toolsToSimulate": [], + "evalSetId": "calc-comprehensive-001", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + }, + { + "id": "sub-basic", + "name": "Basic Subtraction", + "inputs": { + "a": 10, + "b": 4, + "operator": "-" + }, + "expectedOutput": { + "result": 6.0 + }, + "expectedAgentBehavior": "Subtract smaller from larger", + "simulationInstructions": "", + "simulateInput": false, + "inputGenerationInstructions": "", + "simulateTools": false, + "toolsToSimulate": [], + "evalSetId": "calc-comprehensive-001", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + }, + { + "id": "mul-basic", + "name": "Basic Multiplication", + "inputs": { + "a": 7, + "b": 6, + "operator": "*" + }, + "expectedOutput": { + "result": 42.0 + }, + "expectedAgentBehavior": "Multiply two integers", + "simulationInstructions": "", + "simulateInput": false, + "inputGenerationInstructions": "", + "simulateTools": false, + "toolsToSimulate": [], + "evalSetId": "calc-comprehensive-001", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + }, + { + "id": "div-basic", + "name": "Basic Division", + "inputs": { + "a": 15, + "b": 3, + "operator": "/" + }, + "expectedOutput": { + "result": 5.0 + }, + "expectedAgentBehavior": "Divide evenly", + "simulationInstructions": "", + "simulateInput": false, + "inputGenerationInstructions": "", + "simulateTools": false, + "toolsToSimulate": [], + "evalSetId": "calc-comprehensive-001", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + }, + { + "id": "div-zero", + "name": "Division by Zero", + "inputs": { + "a": 10, + "b": 0, + "operator": "/" + }, + "expectedOutput": { + "result": 0.0 + }, + "expectedAgentBehavior": "Handle division by zero", + "simulationInstructions": "", + "simulateInput": false, + "inputGenerationInstructions": "", + "simulateTools": false, + "toolsToSimulate": [], + "evalSetId": "calc-comprehensive-001", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + } + ] +} \ No newline at end of file diff --git a/samples/calculator/evaluators/exact_match.json b/samples/calculator/evaluators/exact_match.json new file mode 100644 index 000000000..4750fc819 --- /dev/null +++ b/samples/calculator/evaluators/exact_match.json @@ -0,0 +1,10 @@ +{ + "id": "exact-match-eval", + "name": "Exact Match Evaluator", + "description": "Tests for exact output matches", + "category": 0, + "type": 1, + "targetOutputKey": "*", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" +} diff --git a/samples/calculator/evaluators/json_similarity.json b/samples/calculator/evaluators/json_similarity.json new file mode 100644 index 000000000..b1fac450e --- /dev/null +++ b/samples/calculator/evaluators/json_similarity.json @@ -0,0 +1,10 @@ +{ + "id": "json-similarity-eval", + "name": "JSON Similarity Evaluator", + "description": "Tests for structural JSON similarity with tolerance", + "category": 0, + "type": 6, + "targetOutputKey": "*", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" +} diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py new file mode 100644 index 000000000..bb2872ab5 --- /dev/null +++ b/src/uipath/_cli/_eval_interactive.py @@ -0,0 +1,1199 @@ +"""Simple interactive CLI for evaluations - keyboard only, no mouse.""" + +import json +import subprocess +import sys +from pathlib import Path +from typing import List, Optional, Tuple + +import select +import sys +import termios +import tty + +def has_termios() -> bool: + """Check if we have termios support for advanced input.""" + try: + termios.tcgetattr(sys.stdin) + return True + except: + return False + +HAS_NAVIGATION = has_termios() + +from ._utils._console import ConsoleLogger + +console = ConsoleLogger() + + +class InteractiveEvalCLI: + """Simple, fast, keyboard-driven evaluation CLI.""" + + def __init__(self, project_root: Path = None): + self.project_root = project_root or Path.cwd() + self.eval_sets: List[Tuple[str, Path]] = [] + self.evaluators: List[Tuple[str, Path]] = [] + self.current_selection = 0 + self.menu_items = [ + "šŸ“‹ List eval sets", + "āš™ļø List evaluators", + "⚔ Quick run (auto-select)", + "āž• Create eval set", + "āž• Create evaluator", + "šŸŽÆ Run specific combination" + ] + self._discover_files() + + def _show_ascii_art(self): + """Display ASCII art banner.""" + art = """ + ā–ˆā–ˆā•— ā–ˆā–ˆā•—ā–ˆā–ˆā•—ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•— ā–ˆā–ˆā–ˆā–ˆā–ˆā•— ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•—ā–ˆā–ˆā•— ā–ˆā–ˆā•— + ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•—ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•—ā•šā•ā•ā–ˆā–ˆā•”ā•ā•ā•ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ + ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•”ā•ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•‘ + ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā•”ā•ā•ā•ā• ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•‘ + ā•šā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•”ā•ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ + ā•šā•ā•ā•ā•ā•ā• ā•šā•ā•ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• + + Evaluation Builder + Interactive Evaluation Toolkit + """ + console.info(art) + + def _discover_files(self) -> None: + """Quickly discover eval sets and evaluators.""" + # Clear existing lists to avoid duplicates + self.eval_sets.clear() + self.evaluators.clear() + + # Find eval sets from evaluationSets folder + eval_sets_dir = self.project_root / "evaluationSets" + if eval_sets_dir.exists(): + for eval_file in eval_sets_dir.glob("*.json"): + try: + with open(eval_file) as f: + data = json.load(f) + # Check if it's an eval set by presence of "evaluations" array + if "evaluations" in data and isinstance(data.get("evaluations"), list): + name = data.get("name", eval_file.stem) + self.eval_sets.append((name, eval_file)) + except: + pass + + # Find evaluators from evaluators folder + evaluators_dir = self.project_root / "evaluators" + if evaluators_dir.exists(): + for eval_file in evaluators_dir.glob("*.json"): + try: + with open(eval_file) as f: + data = json.load(f) + # Verify it has evaluator-specific fields + if "id" in data and "type" in data: + name = data.get("name", eval_file.stem) + self.evaluators.append((name, eval_file)) + except: + pass + + def run(self) -> None: + """Run the interactive CLI.""" + self._show_ascii_art() + + if HAS_NAVIGATION: + self._run_with_navigation() + else: + self._run_basic() + + def _run_with_navigation(self) -> None: + """Run with arrow key navigation.""" + while True: + try: + self._clear_screen() + self._show_status() + self._show_navigable_menu() + + # Get key input + key = self._get_key_input() + + if key in ['q', 'Q']: + console.info("šŸ‘‹ Goodbye!") + break + elif key == 'up': + self.current_selection = (self.current_selection - 1) % len(self.menu_items) + elif key == 'down': + self.current_selection = (self.current_selection + 1) % len(self.menu_items) + elif key in ['enter', ' ']: + self._execute_menu_item_with_navigation(self.current_selection) + elif key.isdigit() and 1 <= int(key) <= len(self.menu_items): + self.current_selection = int(key) - 1 + self._execute_menu_item_with_navigation(self.current_selection) + + except KeyboardInterrupt: + console.info("\nšŸ‘‹ Goodbye!") + break + except Exception as e: + console.error(f"Error: {e}") + self._get_input("\nPress Enter to continue...") + + def _run_basic(self) -> None: + """Run basic mode without arrow keys.""" + while True: + try: + self._show_status() + self._show_main_menu() + choice = self._get_input("\nChoice (1-6, q to quit): ").strip().lower() + + if choice == 'q': + console.info("šŸ‘‹ Goodbye!") + break + elif choice.isdigit() and 1 <= int(choice) <= len(self.menu_items): + self._execute_menu_item(int(choice) - 1) + else: + console.warning("Invalid choice. Try again.") + + if choice in ['1', '2']: + self._get_input("\nPress Enter to continue...") + + except KeyboardInterrupt: + console.info("\nšŸ‘‹ Goodbye!") + break + except Exception as e: + console.error(f"Error: {e}") + + def _clear_screen(self) -> None: + """Clear the screen.""" + import os + os.system('cls' if os.name == 'nt' else 'clear') + self._show_ascii_art() + + def _show_status(self) -> None: + """Show project status.""" + console.info(f"šŸ“ Project: {self.project_root.name}") + console.info(f"šŸ“‹ Eval Sets: {len(self.eval_sets)} | āš™ļø Evaluators: {len(self.evaluators)}") + console.info("─" * 65) + + def _show_navigable_menu(self) -> None: + """Show menu with current selection highlighted.""" + console.info("\nāŒØļø Navigation: ↑↓ to navigate, Enter/Space to select, 1-6 for direct, q to quit, Backspace to go back") + console.info("─" * 65) + + for i, item in enumerate(self.menu_items): + if i == self.current_selection: + console.info(f"ā–ŗ {i+1}. {item} ā—„") + else: + console.info(f" {i+1}. {item}") + + def _get_key_input(self) -> str: + """Get key input with arrow key support.""" + if not HAS_NAVIGATION: + return input("āž¤ ").strip().lower() + + try: + # Set terminal to raw mode + old_settings = termios.tcgetattr(sys.stdin) + tty.setraw(sys.stdin) + + char = sys.stdin.read(1) + + # Handle escape sequences (arrow keys) + if char == '\x1b': # ESC + char += sys.stdin.read(2) + if char == '\x1b[A': # Up arrow + return 'up' + elif char == '\x1b[B': # Down arrow + return 'down' + elif char == '\x1b[C': # Right arrow + return 'enter' + elif char == '\x1b[D': # Left arrow + return 'up' + elif char == '\r' or char == '\n': # Enter + return 'enter' + elif char == ' ': # Space + return 'enter' + elif char in ['q', 'Q']: + return 'q' + elif char == '\x7f': # Backspace (DEL) + return 'back' + elif char == '\x08': # Backspace (BS) + return 'back' + elif char.isdigit() and 1 <= int(char) <= 6: + return char + elif char == '\x03': # Ctrl+C + raise KeyboardInterrupt + + return '' + except: + return input("āž¤ ").strip().lower() + finally: + # Restore terminal settings + try: + termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) + except: + pass + + def _execute_menu_item_with_navigation(self, index: int) -> None: + """Execute menu item with navigation support.""" + if index == 0: + self._drill_down_eval_sets() + elif index == 1: + self._drill_down_evaluators() + elif index == 2: + self._quick_run_no_clear() + elif index == 3: + self._create_eval_set_interactive() + elif index == 4: + self._create_evaluator_interactive() + elif index == 5: + self._run_specific_navigation() + + def _execute_menu_item(self, index: int) -> None: + """Execute selected menu item (basic mode).""" + if index == 0: + self._list_eval_sets() + elif index == 1: + self._list_evaluators() + elif index == 2: + self._quick_run() + elif index == 3: + self._create_eval_set() + elif index == 4: + self._create_evaluator() + elif index == 5: + self._run_specific() + + if index in [0, 1]: + self._get_input("\nPress Enter to continue...") + + def _show_main_menu(self) -> None: + """Show main menu options.""" + console.info(f"\nšŸ“ Project: {self.project_root.name}") + console.info(f"šŸ“‹ Eval Sets: {len(self.eval_sets)} | āš™ļø Evaluators: {len(self.evaluators)}") + console.info("\n" + "─" * 50) + console.info("1. šŸ“‹ List eval sets") + console.info("2. āš™ļø List evaluators") + console.info("3. ⚔ Quick run (auto-select)") + console.info("4. āž• Create eval set") + console.info("5. āž• Create evaluator") + console.info("6. šŸŽÆ Run specific combination") + + def _list_eval_sets(self) -> None: + """List available evaluation sets.""" + console.info("\nšŸ“‹ Available Eval Sets:") + if not self.eval_sets: + console.warning("No eval sets found") + return + + for i, (name, path) in enumerate(self.eval_sets, 1): + # Load test count + try: + with open(path) as f: + data = json.load(f) + test_count = len(data.get("evaluations", [])) + evaluator_count = len(data.get("evaluatorRefs", [])) + console.info(f"{i}. {name}") + console.info(f" Tests: {test_count} | Evaluators: {evaluator_count}") + console.info(f" File: {path.name}") + except: + console.info(f"{i}. {name} (error loading)") + + def _list_evaluators(self) -> None: + """List available evaluators.""" + console.info("\nāš™ļø Available Evaluators:") + if not self.evaluators: + console.warning("No evaluators found") + return + + for i, (name, path) in enumerate(self.evaluators, 1): + try: + with open(path) as f: + data = json.load(f) + category = self._get_category_name(data.get("category", 0)) + type_name = self._get_type_name(data.get("type", 1)) + console.info(f"{i}. {name}") + console.info(f" Type: {category} | {type_name}") + console.info(f" File: {path.name}") + except: + console.info(f"{i}. {name} (error loading)") + + def _list_eval_sets_navigation(self) -> None: + """List eval sets with navigation.""" + self._clear_screen() + console.info("šŸ“‹ Available Eval Sets") + console.info("─" * 65) + self._list_eval_sets() + console.info("\nāŒØļø Press any key to go back...") + self._get_key_input() + + def _list_evaluators_navigation(self) -> None: + """List evaluators with navigation.""" + self._clear_screen() + console.info("āš™ļø Available Evaluators") + console.info("─" * 65) + self._list_evaluators() + console.info("\nāŒØļø Press any key to go back...") + self._get_key_input() + + def _quick_run(self) -> None: + """Quick run with auto-selection.""" + if not self.eval_sets: + console.error("No eval sets found!") + return + + if not self.evaluators: + console.error("No evaluators found!") + return + + console.info("\n⚔ Quick Run:") + + # Auto-select first eval set + eval_name, eval_path = self.eval_sets[0] + console.info(f"šŸ“‹ Using eval set: {eval_name}") + + # Auto-select all evaluators + console.info(f"āš™ļø Using {len(self.evaluators)} evaluators") + + if self._confirm("Run evaluation now?"): + self._execute_evaluation(eval_path) + + def _quick_run_no_clear(self) -> None: + """Quick run without clearing screen.""" + if not self.eval_sets: + console.error("No eval sets found!") + input("\nPress Enter to continue...") + return + + if not self.evaluators: + console.error("No evaluators found!") + input("\nPress Enter to continue...") + return + + console.info("\n⚔ Quick Run:") + + # Auto-select first eval set + eval_name, eval_path = self.eval_sets[0] + console.info(f"šŸ“‹ Using eval set: {eval_name}") + + # Auto-select all evaluators + console.info(f"āš™ļø Using {len(self.evaluators)} evaluators") + + if self._confirm("Run evaluation now?"): + self._execute_evaluation_no_clear(eval_path) + + def _run_specific(self) -> None: + """Run with specific selection.""" + if not self.eval_sets or not self.evaluators: + console.error("Need both eval sets and evaluators!") + return + + # Select eval set with navigation + eval_choice = self._select_from_list(self.eval_sets, "Eval Set") + if eval_choice is None: + return + + eval_name, eval_path = self.eval_sets[eval_choice - 1] + console.success(f"Selected: {eval_name}") + + # Confirm and run + if self._confirm("Run evaluation now?"): + self._execute_evaluation(eval_path) + + def _run_specific_navigation(self) -> None: + """Run specific combination with navigation.""" + if not self.eval_sets or not self.evaluators: + console.error("Need both eval sets and evaluators!") + input("\nPress Enter to continue...") + return + + # Select eval set + self._clear_screen() + console.info("šŸŽÆ Select Evaluation Set") + console.info("─" * 65) + self._list_eval_sets() + + choice = input("\nāž¤ Select eval set number (or q to cancel): ").strip() + if choice.lower() == 'q': + return + + try: + eval_choice = int(choice) + if 1 <= eval_choice <= len(self.eval_sets): + eval_name, eval_path = self.eval_sets[eval_choice - 1] + console.success(f"Selected: {eval_name}") + + if self._confirm("Run evaluation now?"): + self._execute_evaluation_no_clear(eval_path) + except ValueError: + console.error("Invalid selection") + input("\nPress Enter to continue...") + + def _execute_evaluation(self, eval_path: Path) -> None: + """Execute evaluation with live results.""" + console.info("\nšŸš€ Running evaluation...") + + # Find main.py + main_py = self._find_main_py() + if not main_py: + console.error("Could not find main.py") + return + + # Build command - run from the project directory + cmd = [ + sys.executable, "-m", "uipath._cli.cli_eval", + str(main_py.relative_to(self.project_root)), + str(eval_path.relative_to(self.project_root)), + "--no-report", "--workers", "1" + ] + + console.info(f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report") + + try: + # Run with real-time output from project directory + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + universal_newlines=True, + cwd=self.project_root + ) + + # Stream output in real-time + for line in process.stdout: + print(line.rstrip()) + + process.wait() + + if process.returncode == 0: + console.success("\nāœ… Evaluation completed successfully!") + else: + console.error(f"\nāŒ Evaluation failed (exit code: {process.returncode})") + + except Exception as e: + console.error(f"Failed to run evaluation: {e}") + + def _execute_evaluation_no_clear(self, eval_path: Path) -> None: + """Execute evaluation without clearing screen.""" + console.info("\nšŸš€ Running evaluation...") + + # Find main.py + main_py = self._find_main_py() + if not main_py: + console.error("Could not find main.py") + input("\nPress Enter to continue...") + return + + # Build command - run from the project directory + cmd = [ + sys.executable, "-m", "uipath._cli.cli_eval", + str(main_py.relative_to(self.project_root)), + str(eval_path.relative_to(self.project_root)), + "--no-report", "--workers", "1" + ] + + console.info(f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report") + + try: + # Run with real-time output from project directory + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + universal_newlines=True, + cwd=self.project_root + ) + + # Stream output in real-time + for line in process.stdout: + print(line.rstrip()) + + process.wait() + + if process.returncode == 0: + console.success("\nāœ… Evaluation completed successfully!") + else: + console.error(f"\nāŒ Evaluation failed (exit code: {process.returncode})") + + except Exception as e: + console.error(f"Failed to run evaluation: {e}") + + input("\nPress Enter to continue...") + + def _find_main_py(self) -> Optional[Path]: + """Find main.py file.""" + # Check current directory + main_py = self.project_root / "main.py" + if main_py.exists(): + return main_py + + # Check parent directories + for parent in self.project_root.parents: + main_py = parent / "main.py" + if main_py.exists(): + return main_py + + return None + + def _get_input(self, prompt: str) -> str: + """Get user input with prompt.""" + try: + return input(f"āž¤ {prompt}") + except KeyboardInterrupt: + raise + + def _select_from_list(self, items: List[Tuple[str, Path]], title: str) -> Optional[int]: + """Interactive list selection.""" + if not items: + console.warning(f"No {title.lower()} found") + return None + + console.info(f"\n{title}:") + for i, (name, _) in enumerate(items, 1): + console.info(f"{i}. {name}") + + try: + value = input(f"āž¤ {title} number: ") + num = int(value) + if 1 <= num <= len(items): + return num + else: + console.warning(f"Please enter a number between 1 and {len(items)}") + return None + except (ValueError, KeyboardInterrupt): + return None + + def _confirm(self, message: str) -> bool: + """Get yes/no confirmation.""" + response = self._get_input(f"{message} (y/n): ").lower() + return response in ['y', 'yes'] + + def _get_category_name(self, category: int) -> str: + """Get category name.""" + names = {0: "Deterministic", 1: "LLM Judge", 2: "Agent Scorer", 3: "Trajectory"} + return names.get(category, "Unknown") + + def _get_type_name(self, eval_type: int) -> str: + """Get type name.""" + names = { + 0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex", + 4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory" + } + return names.get(eval_type, "Unknown") + + def _drill_down_eval_sets(self) -> None: + """Drill down into eval sets with navigation.""" + if not self.eval_sets: + self._show_no_items_screen("eval sets") + return + + current_selection = 0 + while True: + self._clear_screen() + console.info("šŸ“‹ Eval Sets - Navigate & Select") + console.info("āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back") + console.info("─" * 65) + + for i, (name, path) in enumerate(self.eval_sets): + if i == current_selection: + console.info(f"ā–ŗ {i+1}. {name} ā—„") + self._show_eval_set_preview(path) + else: + console.info(f" {i+1}. {name}") + + key = self._get_key_input() + + if key in ['q', 'Q', 'back']: + break + elif key == 'up': + current_selection = (current_selection - 1) % len(self.eval_sets) + elif key == 'down': + current_selection = (current_selection + 1) % len(self.eval_sets) + elif key in ['enter', ' ']: + self._show_eval_set_details(self.eval_sets[current_selection]) + elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets): + current_selection = int(key) - 1 + + def _drill_down_evaluators(self) -> None: + """Drill down into evaluators with navigation.""" + if not self.evaluators: + self._show_no_items_screen("evaluators") + return + + current_selection = 0 + while True: + self._clear_screen() + console.info("āš™ļø Evaluators - Navigate & Select") + console.info("āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back") + console.info("─" * 65) + + for i, (name, path) in enumerate(self.evaluators): + if i == current_selection: + console.info(f"ā–ŗ {i+1}. {name} ā—„") + self._show_evaluator_preview(path) + else: + console.info(f" {i+1}. {name}") + + key = self._get_key_input() + + if key in ['q', 'Q', 'back']: + break + elif key == 'up': + current_selection = (current_selection - 1) % len(self.evaluators) + elif key == 'down': + current_selection = (current_selection + 1) % len(self.evaluators) + elif key in ['enter', ' ']: + self._show_evaluator_details(self.evaluators[current_selection]) + elif key.isdigit() and 1 <= int(key) <= len(self.evaluators): + current_selection = int(key) - 1 + + def _show_no_items_screen(self, item_type: str) -> None: + """Show no items screen.""" + self._clear_screen() + console.warning(f"No {item_type} found!") + console.info(f"Press Enter to go back...") + self._get_input("") + + def _show_eval_set_preview(self, path: Path) -> None: + """Show eval set preview info.""" + try: + with open(path) as f: + data = json.load(f) + test_count = len(data.get("evaluations", [])) + evaluator_count = len(data.get("evaluatorRefs", [])) + console.info(f" šŸ“„ {path.name}") + console.info(f" šŸ“Š Tests: {test_count} | Evaluators: {evaluator_count}") + except: + console.info(f" šŸ“„ {path.name} (error loading)") + + def _show_evaluator_preview(self, path: Path) -> None: + """Show evaluator preview info.""" + try: + with open(path) as f: + data = json.load(f) + category = self._get_category_name(data.get("category", 0)) + type_name = self._get_type_name(data.get("type", 1)) + console.info(f" šŸ“„ {path.name}") + console.info(f" šŸŽÆ Type: {category} | {type_name}") + except: + console.info(f" šŸ“„ {path.name} (error loading)") + + def _show_eval_set_details(self, eval_set_tuple: Tuple[str, Path]) -> None: + """Show detailed eval set view.""" + name, path = eval_set_tuple + self._clear_screen() + console.info(f"šŸ“‹ Eval Set Details: {name}") + console.info("─" * 65) + + try: + with open(path) as f: + data = json.load(f) + + console.info(f"šŸ“„ File: {path.name}") + console.info(f"šŸ†” ID: {data.get('id', 'Unknown')}") + console.info(f"šŸ“Š Tests: {len(data.get('evaluations', []))}") + console.info(f"āš™ļø Evaluators: {len(data.get('evaluatorRefs', []))}") + console.info(f"šŸ“¦ Batch Size: {data.get('batchSize', 'Unknown')}") + console.info(f"ā±ļø Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes") + + evaluator_refs = data.get('evaluatorRefs', []) + if evaluator_refs: + console.info(f"\nšŸŽÆ Evaluator References:") + for ref in evaluator_refs: + console.info(f" • {ref}") + + evaluations = data.get('evaluations', []) + if evaluations: + console.info(f"\nšŸ“ Test Cases:") + for i, eval_data in enumerate(evaluations[:10], 1): # Show first 10 + test_name = eval_data.get('name', f'Test {i}') + console.info(f" {i}. {test_name}") + if 'inputs' in eval_data: + inputs_preview = str(eval_data['inputs'])[:60] + if len(str(eval_data['inputs'])) > 60: + inputs_preview += "..." + console.info(f" Input: {inputs_preview}") + if 'expectedOutput' in eval_data: + output_preview = str(eval_data['expectedOutput'])[:60] + if len(str(eval_data['expectedOutput'])) > 60: + output_preview += "..." + console.info(f" Expected: {output_preview}") + + if len(evaluations) > 10: + console.info(f" ... and {len(evaluations) - 10} more tests") + + except Exception as e: + console.error(f"Error loading eval set: {e}") + + console.info("\nāŒØļø Press q/Backspace to go back...") + while True: + key = self._get_key_input() + if key in ['q', 'Q', 'back']: + break + + def _show_evaluator_details(self, evaluator_tuple: Tuple[str, Path]) -> None: + """Show detailed evaluator view.""" + name, path = evaluator_tuple + self._clear_screen() + console.info(f"āš™ļø Evaluator Details: {name}") + console.info("─" * 65) + + try: + with open(path) as f: + data = json.load(f) + + console.info(f"šŸ“„ File: {path.name}") + console.info(f"šŸ†” ID: {data.get('id', 'Unknown')}") + console.info(f"šŸ“ Description: {data.get('description', 'No description')}") + console.info(f"šŸ·ļø Category: {self._get_category_name(data.get('category', 0))}") + console.info(f"šŸŽÆ Type: {self._get_type_name(data.get('type', 1))}") + console.info(f"šŸ” Target Key: {data.get('targetOutputKey', '*')}") + + if 'llmConfig' in data: + llm_config = data['llmConfig'] + console.info(f"\nšŸ¤– LLM Configuration:") + console.info(f" Model: {llm_config.get('modelName', 'Unknown')}") + if 'prompt' in llm_config: + prompt_preview = llm_config['prompt'][:100] + if len(llm_config['prompt']) > 100: + prompt_preview += "..." + console.info(f" Prompt: {prompt_preview}") + + except Exception as e: + console.error(f"Error loading evaluator: {e}") + + console.info("\nāŒØļø Press q/Backspace to go back...") + while True: + key = self._get_key_input() + if key in ['q', 'Q', 'back']: + break + + def _create_eval_set(self) -> None: + """Create new evaluation set interactively.""" + console.info("\nāž• Create New Eval Set") + + name = self._get_input("Name: ") + if not name: + return + + # Create clean filename from name + filename = f"{name.lower().replace(' ', '_')}.json" + + # Create basic eval set + eval_set = { + "id": f"eval-{len(self.eval_sets) + 1}", + "fileName": filename, + "evaluatorRefs": [], + "name": name, + "batchSize": 10, + "timeoutMinutes": 20, + "modelSettings": [], + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z", + "evaluations": [] + } + + # Ask if they want to add evaluations + add_evals = self._get_input("Add evaluations now? (y/n): ").lower() + if add_evals in ['y', 'yes']: + eval_set["evaluations"] = self._add_evaluations_interactive(eval_set["id"]) + + # Ensure evaluationSets directory exists + eval_sets_dir = self.project_root / "evaluationSets" + eval_sets_dir.mkdir(exist_ok=True) + + # Save file + file_path = eval_sets_dir / filename + + with open(file_path, 'w') as f: + json.dump(eval_set, f, indent=2) + + console.success(f"āœ… Created eval set: {filename}") + self._discover_files() # Refresh + + def _create_eval_set_interactive(self) -> None: + """Create new evaluation set with comprehensive questions.""" + self._clear_screen() + console.info("āž• Create New Eval Set - Interactive Wizard") + console.info("─" * 65) + + # Basic Information + console.info("šŸ“ Basic Information") + name = input("āž¤ Eval Set Name: ").strip() + if not name: + console.warning("Name is required!") + input("Press Enter to continue...") + return + + # Create clean filename from name + filename = f"{name.lower().replace(' ', '_')}.json" + + # Evaluator References + console.info("\nšŸŽÆ Evaluator References") + console.info("Available evaluators:") + for i, (eval_name, _) in enumerate(self.evaluators, 1): + console.info(f" {i}. {eval_name}") + + evaluator_refs = [] + if self.evaluators: + refs_input = input("āž¤ Select evaluators (comma-separated numbers, or 'all'): ").strip() + if refs_input.lower() == 'all': + evaluator_refs = [self._get_evaluator_id(path) for eval_name, path in self.evaluators] + elif refs_input: + try: + for num in refs_input.split(','): + idx = int(num.strip()) - 1 + if 0 <= idx < len(self.evaluators): + eval_path = self.evaluators[idx][1] + eval_id = self._get_evaluator_id(eval_path) + evaluator_refs.append(eval_id) + except ValueError: + console.warning("Invalid input, no evaluators selected") + + # Test Cases + console.info("\nšŸ“ Test Cases") + evaluations = [] + test_count = 1 + + while True: + console.info(f"\nTest Case #{test_count}") + test_name = input("āž¤ Test Name (or 'done' to finish): ").strip() + if test_name.lower() == 'done': + break + + if not test_name: + console.warning("Test name is required!") + continue + + # Inputs + console.info("šŸ“„ Inputs (JSON format)") + console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}") + inputs_str = input("āž¤ Inputs: ").strip() + try: + inputs = json.loads(inputs_str) if inputs_str else {} + except json.JSONDecodeError: + console.warning("Invalid JSON, using empty inputs") + inputs = {} + + # Expected Output + console.info("šŸ“¤ Expected Output (JSON format)") + expected_str = input("āž¤ Expected Output: ").strip() + try: + expected_output = json.loads(expected_str) if expected_str else {} + except json.JSONDecodeError: + console.warning("Invalid JSON, using empty expected output") + expected_output = {} + + evaluation = { + "id": f"test-{test_count}", + "name": test_name, + "inputs": inputs, + "expectedOutput": expected_output, + "expectedAgentBehavior": "", + "simulationInstructions": "", + "simulateInput": False, + "inputGenerationInstructions": "", + "simulateTools": False, + "toolsToSimulate": [], + "evalSetId": f"eval-{len(self.eval_sets) + 1}", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + } + evaluations.append(evaluation) + test_count += 1 + + if not evaluations: + console.warning("At least one test case is required!") + input("Press Enter to continue...") + return + + # Create eval set + eval_set = { + "id": f"eval-{len(self.eval_sets) + 1}", + "fileName": filename, + "evaluatorRefs": evaluator_refs, + "name": name, + "batchSize": 10, + "timeoutMinutes": 20, + "modelSettings": [], + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z", + "evaluations": evaluations + } + + # Ensure evaluationSets directory exists + eval_sets_dir = self.project_root / "evaluationSets" + eval_sets_dir.mkdir(exist_ok=True) + + # Save file + file_path = eval_sets_dir / filename + + try: + with open(file_path, 'w') as f: + json.dump(eval_set, f, indent=2) + + console.success(f"\nāœ… Created eval set: {filename}") + console.info(f"šŸ“Š Tests: {len(evaluations)}") + console.info(f"āš™ļø Evaluators: {len(evaluator_refs)}") + + self._discover_files() # Refresh + except Exception as e: + console.error(f"Failed to create eval set: {e}") + + input("\nPress Enter to continue...") + + def _add_evaluations_interactive(self, eval_set_id: str) -> List[dict]: + """Add evaluations interactively.""" + evaluations = [] + test_count = 1 + + while True: + console.info(f"\nTest Case #{test_count}") + test_name = self._get_input("Test Name (or 'done' to finish): ") + if test_name.lower() == 'done': + break + + if not test_name: + console.warning("Test name is required!") + continue + + # Simple inputs + console.info("Inputs (JSON format, e.g., {\"a\": 5, \"b\": 3})") + inputs_str = self._get_input("Inputs: ") + try: + inputs = json.loads(inputs_str) if inputs_str else {} + except json.JSONDecodeError: + console.warning("Invalid JSON, using empty inputs") + inputs = {} + + # Expected output + console.info("Expected Output (JSON format)") + expected_str = self._get_input("Expected Output: ") + try: + expected_output = json.loads(expected_str) if expected_str else {} + except json.JSONDecodeError: + console.warning("Invalid JSON, using empty expected output") + expected_output = {} + + evaluation = { + "id": f"test-{test_count}", + "name": test_name, + "inputs": inputs, + "expectedOutput": expected_output, + "expectedAgentBehavior": "", + "simulationInstructions": "", + "simulateInput": False, + "inputGenerationInstructions": "", + "simulateTools": False, + "toolsToSimulate": [], + "evalSetId": eval_set_id, + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + } + evaluations.append(evaluation) + test_count += 1 + + return evaluations + + def _create_evaluator(self) -> None: + """Create new evaluator interactively.""" + console.info("\nāž• Create New Evaluator") + + # Select template + console.info("Templates:") + console.info("1. Exact Match") + console.info("2. JSON Similarity") + + template = self._get_number_input("Template (1-2): ", 1, 2) + if template is None: + return + + name = self._get_input("Name: ") + if not name: + return + + # Template configurations + if template == 1: + evaluator = { + "id": f"eval-{name.lower().replace(' ', '-')}", + "name": name, + "description": "Exact match evaluator", + "category": 0, + "type": 1, + "targetOutputKey": "*", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + } + else: # JSON Similarity + evaluator = { + "id": f"eval-{name.lower().replace(' ', '-')}", + "name": name, + "description": "JSON similarity evaluator", + "category": 0, + "type": 6, + "targetOutputKey": "*", + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + } + + # Ensure evaluators directory exists + evaluators_dir = self.project_root / "evaluators" + evaluators_dir.mkdir(exist_ok=True) + + # Save file + filename = f"{name.lower().replace(' ', '_')}.json" + file_path = evaluators_dir / filename + + with open(file_path, 'w') as f: + json.dump(evaluator, f, indent=2) + + console.success(f"āœ… Created evaluator: {filename}") + self._discover_files() # Refresh + + def _create_evaluator_interactive(self) -> None: + """Create new evaluator with comprehensive questions.""" + self._clear_screen() + console.info("āž• Create New Evaluator - Interactive Wizard") + console.info("─" * 65) + + # Basic Information + console.info("šŸ“ Basic Information") + name = input("āž¤ Evaluator Name: ").strip() + if not name: + console.warning("Name is required!") + input("Press Enter to continue...") + return + + description = input("āž¤ Description: ").strip() or f"{name} evaluator" + + # Category Selection + console.info("\nšŸ·ļø Category Selection") + categories = { + 0: "Deterministic", + 1: "LLM as Judge", + 2: "Agent Scorer", + 3: "Trajectory" + } + + for key, value in categories.items(): + console.info(f" {key}. {value}") + + try: + category = int(input("āž¤ Select Category (0-3): ") or "0") + if category not in categories: + category = 0 + except ValueError: + category = 0 + + # Type Selection + console.info(f"\nšŸŽÆ Type Selection (Category: {categories[category]})") + types = { + 0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex", + 4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory" + } + + # Show relevant types based on category + relevant_types = [] + if category == 0: # Deterministic + relevant_types = [1, 2, 3, 6] # Exact Match, Contains, Regex, JSON Similarity + elif category == 1: # LLM as Judge + relevant_types = [4, 5] # Factuality, Custom + elif category == 3: # Trajectory + relevant_types = [7] # Trajectory + else: + relevant_types = list(types.keys()) + + for type_id in relevant_types: + console.info(f" {type_id}. {types[type_id]}") + + try: + eval_type = int(input(f"āž¤ Select Type ({', '.join(map(str, relevant_types))}): ") or str(relevant_types[0])) + if eval_type not in relevant_types: + eval_type = relevant_types[0] + except (ValueError, IndexError): + eval_type = 1 + + # Target Output Key + console.info(f"\nšŸ” Target Configuration") + console.info("Target Output Key determines which part of the output to evaluate") + console.info("Examples: '*' (all), 'result', 'answer', 'output'") + target_key = input("āž¤ Target Output Key (default: '*'): ").strip() or "*" + + # Create basic evaluator + evaluator = { + "id": f"eval-{name.lower().replace(' ', '-')}", + "name": name, + "description": description, + "category": category, + "type": eval_type, + "targetOutputKey": target_key, + "createdAt": "2025-01-25T00:00:00Z", + "updatedAt": "2025-01-25T00:00:00Z" + } + + # LLM Configuration (if LLM as Judge) + if category == 1: # LLM as Judge + console.info(f"\nšŸ¤– LLM Configuration") + model_name = input("āž¤ Model Name (default: gpt-4): ").strip() or "gpt-4" + + console.info("šŸ“ Evaluation Prompt") + console.info("This prompt will be used to evaluate the agent's output") + prompt = input("āž¤ Evaluation Prompt: ").strip() + + if prompt: + evaluator["llmConfig"] = { + "modelName": model_name, + "prompt": prompt, + "temperature": 0.0, + "maxTokens": 1000 + } + + # Ensure evaluators directory exists + evaluators_dir = self.project_root / "evaluators" + evaluators_dir.mkdir(exist_ok=True) + + # Save file + filename = f"{name.lower().replace(' ', '_')}.json" + file_path = evaluators_dir / filename + + try: + with open(file_path, 'w') as f: + json.dump(evaluator, f, indent=2) + + console.success(f"\nāœ… Created evaluator: {filename}") + console.info(f"šŸ·ļø Category: {categories[category]}") + console.info(f"šŸŽÆ Type: {types[eval_type]}") + console.info(f"šŸ” Target: {target_key}") + + self._discover_files() # Refresh + except Exception as e: + console.error(f"Failed to create evaluator: {e}") + + input("\nPress Enter to continue...") + + def _get_number_input(self, prompt: str, min_val: int, max_val: int) -> Optional[int]: + """Get number input with validation.""" + try: + value = input(f"āž¤ {prompt}") + num = int(value) + if min_val <= num <= max_val: + return num + else: + console.warning(f"Please enter a number between {min_val} and {max_val}") + return None + except (ValueError, KeyboardInterrupt): + return None + + def _get_evaluator_id(self, path: Path) -> str: + """Get evaluator ID from file.""" + try: + with open(path) as f: + data = json.load(f) + return data.get("id", path.stem) + except: + return path.stem + + +def launch_interactive_cli(project_root: Path = None) -> None: + """Launch the interactive CLI.""" + cli = InteractiveEvalCLI(project_root) + cli.run() diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py index 9e95d0c71..53d55e216 100644 --- a/src/uipath/_cli/_utils/_eval_set.py +++ b/src/uipath/_cli/_utils/_eval_set.py @@ -13,7 +13,7 @@ class EvalHelpers: @staticmethod def auto_discover_eval_set() -> str: - """Auto-discover evaluation set from evals/eval-sets directory. + """Auto-discover evaluation set from evaluationSets or evals/eval-sets directory. Returns: Path to the evaluation set file @@ -21,19 +21,24 @@ def auto_discover_eval_set() -> str: Raises: ValueError: If no eval set found or multiple eval sets exist """ - eval_sets_dir = Path("evals/eval-sets") + # Try evaluationSets folder first (new structure) + eval_sets_dir = Path("evaluationSets") + + # Fall back to evals/eval-sets (old structure) + if not eval_sets_dir.exists(): + eval_sets_dir = Path("evals/eval-sets") if not eval_sets_dir.exists(): raise ValueError( - "No 'evals/eval-sets' directory found. " - "Please set 'UIPATH_PROJECT_ID' env var and run 'uipath pull'." + "No 'evaluationSets' or 'evals/eval-sets' directory found. " + "Please create an evaluation set or set 'UIPATH_PROJECT_ID' env var and run 'uipath pull'." ) eval_set_files = list(eval_sets_dir.glob("*.json")) if not eval_set_files: raise ValueError( - "No evaluation set files found in 'evals/eval-sets' directory. " + f"No evaluation set files found in '{eval_sets_dir}' directory. " ) if len(eval_set_files) > 1: diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py index 53dd3bc12..70debd662 100644 --- a/src/uipath/_cli/cli_eval.py +++ b/src/uipath/_cli/cli_eval.py @@ -31,6 +31,55 @@ console = ConsoleLogger() +def _display_local_results(results_data): + """Display evaluation results locally in a formatted way.""" + if not results_data: + return + + evaluation_set_name = results_data.get("evaluationSetName", "Unknown") + overall_score = results_data.get("score", 0.0) + evaluation_results = results_data.get("evaluationSetResults", []) + + console.info(f"\nšŸŽÆ Evaluation Report: {evaluation_set_name}") + console.info(f"šŸ“Š Overall Score: {overall_score:.1f}%") + console.info("=" * 60) + + passed_count = 0 + total_count = len(evaluation_results) + + for i, test in enumerate(evaluation_results, 1): + test_score = test.get("score", 0.0) + test_name = test.get("evaluationName", f"Test {i}") + + if test_score == 100.0: + status = "āœ… PASS" + passed_count += 1 + elif test_score == 0.0: + status = "āŒ FAIL" + else: + status = "āš ļø PARTIAL" + passed_count += 0.5 # Partial credit + + console.info(f"\n{i}. {test_name}: {status} ({test_score:.1f}%)") + + evaluator_results = test.get("evaluationRunResults", []) + for evaluator_result in evaluator_results: + evaluator_name = evaluator_result.get("evaluatorName", "Unknown Evaluator") + result = evaluator_result.get("result", {}) + score = result.get("score", 0.0) + eval_time = result.get("evaluationTime", 0.0) + console.info(f" └─ {evaluator_name}: {score:.1f}% ({eval_time*1000:.2f}ms)") + + console.info(f"\nšŸŽÆ Summary: {int(passed_count)}/{total_count} tests passed") + if overall_score == 100.0: + console.success("šŸŽ‰ All tests passed!") + elif overall_score == 0.0: + console.info("šŸ’„ All tests failed!") + else: + console.info(f"⚔ Partial success: {overall_score:.1f}% overall score") + console.info("") + + class LiteralOption(click.Option): def type_cast_value(self, ctx, value): try: @@ -61,6 +110,12 @@ def type_cast_value(self, ctx, value): type=click.Path(exists=False), help="File path where the output will be written", ) +@click.option( + "--interactive", + is_flag=True, + help="Launch streamlined keyboard-only interactive CLI", + default=False, +) @track(when=lambda *_a, **_kw: os.getenv(ENV_JOB_ID) is None) def eval( entrypoint: Optional[str], @@ -69,6 +124,7 @@ def eval( no_report: bool, workers: int, output_file: Optional[str], + interactive: bool, ) -> None: """Run an evaluation set against the agent. @@ -78,7 +134,20 @@ def eval( eval_ids: Optional list of evaluation IDs workers: Number of parallel workers for running evaluations no_report: Do not report the evaluation results + interactive: Launch streamlined keyboard-only interactive CLI """ + # Handle interactive mode + if interactive: + try: + from ._eval_interactive import launch_interactive_cli + launch_interactive_cli() + return + except ImportError as e: + console.error(f"Interactive mode requires additional dependencies: {e}") + return + except Exception as e: + console.error(f"Failed to launch interactive mode: {e}") + return if not no_report and not os.getenv("UIPATH_FOLDER_KEY"): os.environ["UIPATH_FOLDER_KEY"] = asyncio.run( get_personal_workspace_key_async() @@ -131,16 +200,24 @@ def generate_runtime_context(**context_kwargs) -> UiPathRuntimeContext: if eval_context.job_id: runtime_factory.add_span_exporter(LlmOpsHttpExporter()) + eval_runtime_ref = None + async def execute(): + nonlocal eval_runtime_ref async with UiPathEvalRuntime.from_eval_context( factory=runtime_factory, context=eval_context, event_bus=event_bus, ) as eval_runtime: + eval_runtime_ref = eval_runtime await eval_runtime.execute() await event_bus.wait_for_all(timeout=10) asyncio.run(execute()) + + # Display results locally when --no-report is used + if no_report and eval_runtime_ref and eval_runtime_ref.context.result: + _display_local_results(eval_runtime_ref.context.result.output) except Exception as e: console.error( f"Error: Unexpected error occurred - {str(e)}", include_traceback=True From 03875e1f12685350f400b05ef94b895401e67107 Mon Sep 17 00:00:00 2001 From: Chibi Vikramathithan Date: Tue, 30 Sep 2025 18:05:44 -0700 Subject: [PATCH 2/6] fix: mypy type errors in interactive eval mode - Add Optional type hints for Path parameters - Add Dict and Any imports for type annotations - Add null checks for process.stdout before iteration - Add type annotations for evaluation dictionaries - Fix return type for _add_evaluations_interactive --- src/uipath/_cli/_eval_interactive.py | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py index bb2872ab5..c77c868e5 100644 --- a/src/uipath/_cli/_eval_interactive.py +++ b/src/uipath/_cli/_eval_interactive.py @@ -4,7 +4,7 @@ import subprocess import sys from pathlib import Path -from typing import List, Optional, Tuple +from typing import Any, Dict, List, Optional, Tuple import select import sys @@ -29,7 +29,7 @@ def has_termios() -> bool: class InteractiveEvalCLI: """Simple, fast, keyboard-driven evaluation CLI.""" - def __init__(self, project_root: Path = None): + def __init__(self, project_root: Optional[Path] = None): self.project_root = project_root or Path.cwd() self.eval_sets: List[Tuple[str, Path]] = [] self.evaluators: List[Tuple[str, Path]] = [] @@ -457,8 +457,9 @@ def _execute_evaluation(self, eval_path: Path) -> None: ) # Stream output in real-time - for line in process.stdout: - print(line.rstrip()) + if process.stdout: + for line in process.stdout: + print(line.rstrip()) process.wait() @@ -504,8 +505,9 @@ def _execute_evaluation_no_clear(self, eval_path: Path) -> None: ) # Stream output in real-time - for line in process.stdout: - print(line.rstrip()) + if process.stdout: + for line in process.stdout: + print(line.rstrip()) process.wait() @@ -795,7 +797,7 @@ def _create_eval_set(self) -> None: # Ask if they want to add evaluations add_evals = self._get_input("Add evaluations now? (y/n): ").lower() if add_evals in ['y', 'yes']: - eval_set["evaluations"] = self._add_evaluations_interactive(eval_set["id"]) + eval_set["evaluations"] = self._add_evaluations_interactive(str(eval_set["id"])) # Ensure evaluationSets directory exists eval_sets_dir = self.project_root / "evaluationSets" @@ -883,7 +885,7 @@ def _create_eval_set_interactive(self) -> None: console.warning("Invalid JSON, using empty expected output") expected_output = {} - evaluation = { + evaluation: Dict[str, Any] = { "id": f"test-{test_count}", "name": test_name, "inputs": inputs, @@ -941,7 +943,7 @@ def _create_eval_set_interactive(self) -> None: input("\nPress Enter to continue...") - def _add_evaluations_interactive(self, eval_set_id: str) -> List[dict]: + def _add_evaluations_interactive(self, eval_set_id: str) -> List[Dict[str, Any]]: """Add evaluations interactively.""" evaluations = [] test_count = 1 @@ -974,7 +976,7 @@ def _add_evaluations_interactive(self, eval_set_id: str) -> List[dict]: console.warning("Invalid JSON, using empty expected output") expected_output = {} - evaluation = { + evaluation: Dict[str, Any] = { "id": f"test-{test_count}", "name": test_name, "inputs": inputs, @@ -1193,7 +1195,7 @@ def _get_evaluator_id(self, path: Path) -> str: return path.stem -def launch_interactive_cli(project_root: Path = None) -> None: +def launch_interactive_cli(project_root: Optional[Path] = None) -> None: """Launch the interactive CLI.""" cli = InteractiveEvalCLI(project_root) cli.run() From f7dbc09112780b779a2e8ccce4510db53e710e2a Mon Sep 17 00:00:00 2001 From: Chibi Vikramathithan Date: Tue, 30 Sep 2025 18:10:34 -0700 Subject: [PATCH 3/6] fix: ruff linting errors in interactive eval mode - Move imports to top of file - Replace all bare except clauses with Exception - Remove unused f-string prefixes - Fix import organization --- src/uipath/_cli/_eval_interactive.py | 44 +++++++++++++--------------- 1 file changed, 21 insertions(+), 23 deletions(-) diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py index c77c868e5..02c670c39 100644 --- a/src/uipath/_cli/_eval_interactive.py +++ b/src/uipath/_cli/_eval_interactive.py @@ -3,26 +3,24 @@ import json import subprocess import sys +import termios +import tty from pathlib import Path from typing import Any, Dict, List, Optional, Tuple -import select -import sys -import termios -import tty +from ._utils._console import ConsoleLogger + def has_termios() -> bool: """Check if we have termios support for advanced input.""" try: termios.tcgetattr(sys.stdin) return True - except: + except Exception: return False -HAS_NAVIGATION = has_termios() - -from ._utils._console import ConsoleLogger +HAS_NAVIGATION = has_termios() console = ConsoleLogger() @@ -76,7 +74,7 @@ def _discover_files(self) -> None: if "evaluations" in data and isinstance(data.get("evaluations"), list): name = data.get("name", eval_file.stem) self.eval_sets.append((name, eval_file)) - except: + except Exception: pass # Find evaluators from evaluators folder @@ -90,7 +88,7 @@ def _discover_files(self) -> None: if "id" in data and "type" in data: name = data.get("name", eval_file.stem) self.evaluators.append((name, eval_file)) - except: + except Exception: pass def run(self) -> None: @@ -220,13 +218,13 @@ def _get_key_input(self) -> str: raise KeyboardInterrupt return '' - except: + except Exception: return input("āž¤ ").strip().lower() finally: # Restore terminal settings try: termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) - except: + except Exception: pass def _execute_menu_item_with_navigation(self, index: int) -> None: @@ -291,7 +289,7 @@ def _list_eval_sets(self) -> None: console.info(f"{i}. {name}") console.info(f" Tests: {test_count} | Evaluators: {evaluator_count}") console.info(f" File: {path.name}") - except: + except Exception: console.info(f"{i}. {name} (error loading)") def _list_evaluators(self) -> None: @@ -310,7 +308,7 @@ def _list_evaluators(self) -> None: console.info(f"{i}. {name}") console.info(f" Type: {category} | {type_name}") console.info(f" File: {path.name}") - except: + except Exception: console.info(f"{i}. {name} (error loading)") def _list_eval_sets_navigation(self) -> None: @@ -652,7 +650,7 @@ def _show_no_items_screen(self, item_type: str) -> None: """Show no items screen.""" self._clear_screen() console.warning(f"No {item_type} found!") - console.info(f"Press Enter to go back...") + console.info("Press Enter to go back...") self._get_input("") def _show_eval_set_preview(self, path: Path) -> None: @@ -664,7 +662,7 @@ def _show_eval_set_preview(self, path: Path) -> None: evaluator_count = len(data.get("evaluatorRefs", [])) console.info(f" šŸ“„ {path.name}") console.info(f" šŸ“Š Tests: {test_count} | Evaluators: {evaluator_count}") - except: + except Exception: console.info(f" šŸ“„ {path.name} (error loading)") def _show_evaluator_preview(self, path: Path) -> None: @@ -676,7 +674,7 @@ def _show_evaluator_preview(self, path: Path) -> None: type_name = self._get_type_name(data.get("type", 1)) console.info(f" šŸ“„ {path.name}") console.info(f" šŸŽÆ Type: {category} | {type_name}") - except: + except Exception: console.info(f" šŸ“„ {path.name} (error loading)") def _show_eval_set_details(self, eval_set_tuple: Tuple[str, Path]) -> None: @@ -699,13 +697,13 @@ def _show_eval_set_details(self, eval_set_tuple: Tuple[str, Path]) -> None: evaluator_refs = data.get('evaluatorRefs', []) if evaluator_refs: - console.info(f"\nšŸŽÆ Evaluator References:") + console.info("\nšŸŽÆ Evaluator References:") for ref in evaluator_refs: console.info(f" • {ref}") evaluations = data.get('evaluations', []) if evaluations: - console.info(f"\nšŸ“ Test Cases:") + console.info("\nšŸ“ Test Cases:") for i, eval_data in enumerate(evaluations[:10], 1): # Show first 10 test_name = eval_data.get('name', f'Test {i}') console.info(f" {i}. {test_name}") @@ -752,7 +750,7 @@ def _show_evaluator_details(self, evaluator_tuple: Tuple[str, Path]) -> None: if 'llmConfig' in data: llm_config = data['llmConfig'] - console.info(f"\nšŸ¤– LLM Configuration:") + console.info("\nšŸ¤– LLM Configuration:") console.info(f" Model: {llm_config.get('modelName', 'Unknown')}") if 'prompt' in llm_config: prompt_preview = llm_config['prompt'][:100] @@ -1115,7 +1113,7 @@ def _create_evaluator_interactive(self) -> None: eval_type = 1 # Target Output Key - console.info(f"\nšŸ” Target Configuration") + console.info("\nšŸ” Target Configuration") console.info("Target Output Key determines which part of the output to evaluate") console.info("Examples: '*' (all), 'result', 'answer', 'output'") target_key = input("āž¤ Target Output Key (default: '*'): ").strip() or "*" @@ -1134,7 +1132,7 @@ def _create_evaluator_interactive(self) -> None: # LLM Configuration (if LLM as Judge) if category == 1: # LLM as Judge - console.info(f"\nšŸ¤– LLM Configuration") + console.info("\nšŸ¤– LLM Configuration") model_name = input("āž¤ Model Name (default: gpt-4): ").strip() or "gpt-4" console.info("šŸ“ Evaluation Prompt") @@ -1191,7 +1189,7 @@ def _get_evaluator_id(self, path: Path) -> str: with open(path) as f: data = json.load(f) return data.get("id", path.stem) - except: + except Exception: return path.stem From 71b9a0049bf5aa6064248271e58205c92a8f96fe Mon Sep 17 00:00:00 2001 From: Chibi Vikramathithan Date: Tue, 30 Sep 2025 18:14:23 -0700 Subject: [PATCH 4/6] fix: use dynamic timestamps for eval sets and evaluators Replace hardcoded 2025-01-25 timestamps with datetime.now(timezone.utc) for createdAt and updatedAt fields in eval sets and evaluators. --- src/uipath/_cli/_eval_interactive.py | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py index 02c670c39..a0b54d650 100644 --- a/src/uipath/_cli/_eval_interactive.py +++ b/src/uipath/_cli/_eval_interactive.py @@ -5,6 +5,7 @@ import sys import termios import tty +from datetime import datetime, timezone from pathlib import Path from typing import Any, Dict, List, Optional, Tuple @@ -787,8 +788,8 @@ def _create_eval_set(self) -> None: "batchSize": 10, "timeoutMinutes": 20, "modelSettings": [], - "createdAt": "2025-01-25T00:00:00Z", - "updatedAt": "2025-01-25T00:00:00Z", + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), "evaluations": [] } @@ -895,8 +896,8 @@ def _create_eval_set_interactive(self) -> None: "simulateTools": False, "toolsToSimulate": [], "evalSetId": f"eval-{len(self.eval_sets) + 1}", - "createdAt": "2025-01-25T00:00:00Z", - "updatedAt": "2025-01-25T00:00:00Z" + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') } evaluations.append(evaluation) test_count += 1 @@ -915,8 +916,8 @@ def _create_eval_set_interactive(self) -> None: "batchSize": 10, "timeoutMinutes": 20, "modelSettings": [], - "createdAt": "2025-01-25T00:00:00Z", - "updatedAt": "2025-01-25T00:00:00Z", + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), "evaluations": evaluations } @@ -986,8 +987,8 @@ def _add_evaluations_interactive(self, eval_set_id: str) -> List[Dict[str, Any]] "simulateTools": False, "toolsToSimulate": [], "evalSetId": eval_set_id, - "createdAt": "2025-01-25T00:00:00Z", - "updatedAt": "2025-01-25T00:00:00Z" + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') } evaluations.append(evaluation) test_count += 1 @@ -1020,8 +1021,8 @@ def _create_evaluator(self) -> None: "category": 0, "type": 1, "targetOutputKey": "*", - "createdAt": "2025-01-25T00:00:00Z", - "updatedAt": "2025-01-25T00:00:00Z" + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') } else: # JSON Similarity evaluator = { @@ -1031,8 +1032,8 @@ def _create_evaluator(self) -> None: "category": 0, "type": 6, "targetOutputKey": "*", - "createdAt": "2025-01-25T00:00:00Z", - "updatedAt": "2025-01-25T00:00:00Z" + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') } # Ensure evaluators directory exists From 5892351610a19c9567480eea2f70eaff1a50ffcb Mon Sep 17 00:00:00 2001 From: Chibi Vikramathithan Date: Tue, 30 Sep 2025 18:25:52 -0700 Subject: [PATCH 5/6] refactor: split interactive eval CLI into modular files Reorganized the monolithic _eval_interactive.py into a maintainable module structure under src/uipath/_cli/_interactive/: - __init__.py: Module exports - _main.py: Main CLI class and entry point - _navigation.py: Navigation and input handling - _discovery.py: File discovery for eval sets and evaluators - _eval_sets.py: Eval set creation and management - _evaluators.py: Evaluator creation and management - _execution.py: Evaluation execution utilities - _drill_down.py: Drill-down navigation views Benefits: - Easier to maintain and extend individual features - Clear separation of concerns - Better code organization - Each file has a single responsibility Updated pyproject.toml to disable misc/unused-ignore mypy errors for interactive module (known limitation with mixin pattern). --- pyproject.toml | 4 + src/uipath/_cli/_eval_interactive.py | 1200 ------------------- src/uipath/_cli/_interactive/__init__.py | 5 + src/uipath/_cli/_interactive/_discovery.py | 46 + src/uipath/_cli/_interactive/_drill_down.py | 88 ++ src/uipath/_cli/_interactive/_eval_sets.py | 329 +++++ src/uipath/_cli/_interactive/_evaluators.py | 273 +++++ src/uipath/_cli/_interactive/_execution.py | 135 +++ src/uipath/_cli/_interactive/_main.py | 193 +++ src/uipath/_cli/_interactive/_navigation.py | 109 ++ src/uipath/_cli/cli_eval.py | 2 +- 11 files changed, 1183 insertions(+), 1201 deletions(-) delete mode 100644 src/uipath/_cli/_eval_interactive.py create mode 100644 src/uipath/_cli/_interactive/__init__.py create mode 100644 src/uipath/_cli/_interactive/_discovery.py create mode 100644 src/uipath/_cli/_interactive/_drill_down.py create mode 100644 src/uipath/_cli/_interactive/_eval_sets.py create mode 100644 src/uipath/_cli/_interactive/_evaluators.py create mode 100644 src/uipath/_cli/_interactive/_execution.py create mode 100644 src/uipath/_cli/_interactive/_main.py create mode 100644 src/uipath/_cli/_interactive/_navigation.py diff --git a/pyproject.toml b/pyproject.toml index bf148234f..21f8f967a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -116,6 +116,10 @@ no_implicit_reexport = true disallow_untyped_defs = false +[[tool.mypy.overrides]] +module = "uipath._cli._interactive.*" +disable_error_code = ["misc", "unused-ignore"] + [tool.pydantic-mypy] init_forbid_extra = true init_typed = true diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py deleted file mode 100644 index a0b54d650..000000000 --- a/src/uipath/_cli/_eval_interactive.py +++ /dev/null @@ -1,1200 +0,0 @@ -"""Simple interactive CLI for evaluations - keyboard only, no mouse.""" - -import json -import subprocess -import sys -import termios -import tty -from datetime import datetime, timezone -from pathlib import Path -from typing import Any, Dict, List, Optional, Tuple - -from ._utils._console import ConsoleLogger - - -def has_termios() -> bool: - """Check if we have termios support for advanced input.""" - try: - termios.tcgetattr(sys.stdin) - return True - except Exception: - return False - - -HAS_NAVIGATION = has_termios() -console = ConsoleLogger() - - -class InteractiveEvalCLI: - """Simple, fast, keyboard-driven evaluation CLI.""" - - def __init__(self, project_root: Optional[Path] = None): - self.project_root = project_root or Path.cwd() - self.eval_sets: List[Tuple[str, Path]] = [] - self.evaluators: List[Tuple[str, Path]] = [] - self.current_selection = 0 - self.menu_items = [ - "šŸ“‹ List eval sets", - "āš™ļø List evaluators", - "⚔ Quick run (auto-select)", - "āž• Create eval set", - "āž• Create evaluator", - "šŸŽÆ Run specific combination" - ] - self._discover_files() - - def _show_ascii_art(self): - """Display ASCII art banner.""" - art = """ - ā–ˆā–ˆā•— ā–ˆā–ˆā•—ā–ˆā–ˆā•—ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•— ā–ˆā–ˆā–ˆā–ˆā–ˆā•— ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•—ā–ˆā–ˆā•— ā–ˆā–ˆā•— - ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•—ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•—ā•šā•ā•ā–ˆā–ˆā•”ā•ā•ā•ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ - ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•”ā•ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•‘ - ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā•”ā•ā•ā•ā• ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•‘ - ā•šā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•”ā•ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ - ā•šā•ā•ā•ā•ā•ā• ā•šā•ā•ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• - - Evaluation Builder - Interactive Evaluation Toolkit - """ - console.info(art) - - def _discover_files(self) -> None: - """Quickly discover eval sets and evaluators.""" - # Clear existing lists to avoid duplicates - self.eval_sets.clear() - self.evaluators.clear() - - # Find eval sets from evaluationSets folder - eval_sets_dir = self.project_root / "evaluationSets" - if eval_sets_dir.exists(): - for eval_file in eval_sets_dir.glob("*.json"): - try: - with open(eval_file) as f: - data = json.load(f) - # Check if it's an eval set by presence of "evaluations" array - if "evaluations" in data and isinstance(data.get("evaluations"), list): - name = data.get("name", eval_file.stem) - self.eval_sets.append((name, eval_file)) - except Exception: - pass - - # Find evaluators from evaluators folder - evaluators_dir = self.project_root / "evaluators" - if evaluators_dir.exists(): - for eval_file in evaluators_dir.glob("*.json"): - try: - with open(eval_file) as f: - data = json.load(f) - # Verify it has evaluator-specific fields - if "id" in data and "type" in data: - name = data.get("name", eval_file.stem) - self.evaluators.append((name, eval_file)) - except Exception: - pass - - def run(self) -> None: - """Run the interactive CLI.""" - self._show_ascii_art() - - if HAS_NAVIGATION: - self._run_with_navigation() - else: - self._run_basic() - - def _run_with_navigation(self) -> None: - """Run with arrow key navigation.""" - while True: - try: - self._clear_screen() - self._show_status() - self._show_navigable_menu() - - # Get key input - key = self._get_key_input() - - if key in ['q', 'Q']: - console.info("šŸ‘‹ Goodbye!") - break - elif key == 'up': - self.current_selection = (self.current_selection - 1) % len(self.menu_items) - elif key == 'down': - self.current_selection = (self.current_selection + 1) % len(self.menu_items) - elif key in ['enter', ' ']: - self._execute_menu_item_with_navigation(self.current_selection) - elif key.isdigit() and 1 <= int(key) <= len(self.menu_items): - self.current_selection = int(key) - 1 - self._execute_menu_item_with_navigation(self.current_selection) - - except KeyboardInterrupt: - console.info("\nšŸ‘‹ Goodbye!") - break - except Exception as e: - console.error(f"Error: {e}") - self._get_input("\nPress Enter to continue...") - - def _run_basic(self) -> None: - """Run basic mode without arrow keys.""" - while True: - try: - self._show_status() - self._show_main_menu() - choice = self._get_input("\nChoice (1-6, q to quit): ").strip().lower() - - if choice == 'q': - console.info("šŸ‘‹ Goodbye!") - break - elif choice.isdigit() and 1 <= int(choice) <= len(self.menu_items): - self._execute_menu_item(int(choice) - 1) - else: - console.warning("Invalid choice. Try again.") - - if choice in ['1', '2']: - self._get_input("\nPress Enter to continue...") - - except KeyboardInterrupt: - console.info("\nšŸ‘‹ Goodbye!") - break - except Exception as e: - console.error(f"Error: {e}") - - def _clear_screen(self) -> None: - """Clear the screen.""" - import os - os.system('cls' if os.name == 'nt' else 'clear') - self._show_ascii_art() - - def _show_status(self) -> None: - """Show project status.""" - console.info(f"šŸ“ Project: {self.project_root.name}") - console.info(f"šŸ“‹ Eval Sets: {len(self.eval_sets)} | āš™ļø Evaluators: {len(self.evaluators)}") - console.info("─" * 65) - - def _show_navigable_menu(self) -> None: - """Show menu with current selection highlighted.""" - console.info("\nāŒØļø Navigation: ↑↓ to navigate, Enter/Space to select, 1-6 for direct, q to quit, Backspace to go back") - console.info("─" * 65) - - for i, item in enumerate(self.menu_items): - if i == self.current_selection: - console.info(f"ā–ŗ {i+1}. {item} ā—„") - else: - console.info(f" {i+1}. {item}") - - def _get_key_input(self) -> str: - """Get key input with arrow key support.""" - if not HAS_NAVIGATION: - return input("āž¤ ").strip().lower() - - try: - # Set terminal to raw mode - old_settings = termios.tcgetattr(sys.stdin) - tty.setraw(sys.stdin) - - char = sys.stdin.read(1) - - # Handle escape sequences (arrow keys) - if char == '\x1b': # ESC - char += sys.stdin.read(2) - if char == '\x1b[A': # Up arrow - return 'up' - elif char == '\x1b[B': # Down arrow - return 'down' - elif char == '\x1b[C': # Right arrow - return 'enter' - elif char == '\x1b[D': # Left arrow - return 'up' - elif char == '\r' or char == '\n': # Enter - return 'enter' - elif char == ' ': # Space - return 'enter' - elif char in ['q', 'Q']: - return 'q' - elif char == '\x7f': # Backspace (DEL) - return 'back' - elif char == '\x08': # Backspace (BS) - return 'back' - elif char.isdigit() and 1 <= int(char) <= 6: - return char - elif char == '\x03': # Ctrl+C - raise KeyboardInterrupt - - return '' - except Exception: - return input("āž¤ ").strip().lower() - finally: - # Restore terminal settings - try: - termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) - except Exception: - pass - - def _execute_menu_item_with_navigation(self, index: int) -> None: - """Execute menu item with navigation support.""" - if index == 0: - self._drill_down_eval_sets() - elif index == 1: - self._drill_down_evaluators() - elif index == 2: - self._quick_run_no_clear() - elif index == 3: - self._create_eval_set_interactive() - elif index == 4: - self._create_evaluator_interactive() - elif index == 5: - self._run_specific_navigation() - - def _execute_menu_item(self, index: int) -> None: - """Execute selected menu item (basic mode).""" - if index == 0: - self._list_eval_sets() - elif index == 1: - self._list_evaluators() - elif index == 2: - self._quick_run() - elif index == 3: - self._create_eval_set() - elif index == 4: - self._create_evaluator() - elif index == 5: - self._run_specific() - - if index in [0, 1]: - self._get_input("\nPress Enter to continue...") - - def _show_main_menu(self) -> None: - """Show main menu options.""" - console.info(f"\nšŸ“ Project: {self.project_root.name}") - console.info(f"šŸ“‹ Eval Sets: {len(self.eval_sets)} | āš™ļø Evaluators: {len(self.evaluators)}") - console.info("\n" + "─" * 50) - console.info("1. šŸ“‹ List eval sets") - console.info("2. āš™ļø List evaluators") - console.info("3. ⚔ Quick run (auto-select)") - console.info("4. āž• Create eval set") - console.info("5. āž• Create evaluator") - console.info("6. šŸŽÆ Run specific combination") - - def _list_eval_sets(self) -> None: - """List available evaluation sets.""" - console.info("\nšŸ“‹ Available Eval Sets:") - if not self.eval_sets: - console.warning("No eval sets found") - return - - for i, (name, path) in enumerate(self.eval_sets, 1): - # Load test count - try: - with open(path) as f: - data = json.load(f) - test_count = len(data.get("evaluations", [])) - evaluator_count = len(data.get("evaluatorRefs", [])) - console.info(f"{i}. {name}") - console.info(f" Tests: {test_count} | Evaluators: {evaluator_count}") - console.info(f" File: {path.name}") - except Exception: - console.info(f"{i}. {name} (error loading)") - - def _list_evaluators(self) -> None: - """List available evaluators.""" - console.info("\nāš™ļø Available Evaluators:") - if not self.evaluators: - console.warning("No evaluators found") - return - - for i, (name, path) in enumerate(self.evaluators, 1): - try: - with open(path) as f: - data = json.load(f) - category = self._get_category_name(data.get("category", 0)) - type_name = self._get_type_name(data.get("type", 1)) - console.info(f"{i}. {name}") - console.info(f" Type: {category} | {type_name}") - console.info(f" File: {path.name}") - except Exception: - console.info(f"{i}. {name} (error loading)") - - def _list_eval_sets_navigation(self) -> None: - """List eval sets with navigation.""" - self._clear_screen() - console.info("šŸ“‹ Available Eval Sets") - console.info("─" * 65) - self._list_eval_sets() - console.info("\nāŒØļø Press any key to go back...") - self._get_key_input() - - def _list_evaluators_navigation(self) -> None: - """List evaluators with navigation.""" - self._clear_screen() - console.info("āš™ļø Available Evaluators") - console.info("─" * 65) - self._list_evaluators() - console.info("\nāŒØļø Press any key to go back...") - self._get_key_input() - - def _quick_run(self) -> None: - """Quick run with auto-selection.""" - if not self.eval_sets: - console.error("No eval sets found!") - return - - if not self.evaluators: - console.error("No evaluators found!") - return - - console.info("\n⚔ Quick Run:") - - # Auto-select first eval set - eval_name, eval_path = self.eval_sets[0] - console.info(f"šŸ“‹ Using eval set: {eval_name}") - - # Auto-select all evaluators - console.info(f"āš™ļø Using {len(self.evaluators)} evaluators") - - if self._confirm("Run evaluation now?"): - self._execute_evaluation(eval_path) - - def _quick_run_no_clear(self) -> None: - """Quick run without clearing screen.""" - if not self.eval_sets: - console.error("No eval sets found!") - input("\nPress Enter to continue...") - return - - if not self.evaluators: - console.error("No evaluators found!") - input("\nPress Enter to continue...") - return - - console.info("\n⚔ Quick Run:") - - # Auto-select first eval set - eval_name, eval_path = self.eval_sets[0] - console.info(f"šŸ“‹ Using eval set: {eval_name}") - - # Auto-select all evaluators - console.info(f"āš™ļø Using {len(self.evaluators)} evaluators") - - if self._confirm("Run evaluation now?"): - self._execute_evaluation_no_clear(eval_path) - - def _run_specific(self) -> None: - """Run with specific selection.""" - if not self.eval_sets or not self.evaluators: - console.error("Need both eval sets and evaluators!") - return - - # Select eval set with navigation - eval_choice = self._select_from_list(self.eval_sets, "Eval Set") - if eval_choice is None: - return - - eval_name, eval_path = self.eval_sets[eval_choice - 1] - console.success(f"Selected: {eval_name}") - - # Confirm and run - if self._confirm("Run evaluation now?"): - self._execute_evaluation(eval_path) - - def _run_specific_navigation(self) -> None: - """Run specific combination with navigation.""" - if not self.eval_sets or not self.evaluators: - console.error("Need both eval sets and evaluators!") - input("\nPress Enter to continue...") - return - - # Select eval set - self._clear_screen() - console.info("šŸŽÆ Select Evaluation Set") - console.info("─" * 65) - self._list_eval_sets() - - choice = input("\nāž¤ Select eval set number (or q to cancel): ").strip() - if choice.lower() == 'q': - return - - try: - eval_choice = int(choice) - if 1 <= eval_choice <= len(self.eval_sets): - eval_name, eval_path = self.eval_sets[eval_choice - 1] - console.success(f"Selected: {eval_name}") - - if self._confirm("Run evaluation now?"): - self._execute_evaluation_no_clear(eval_path) - except ValueError: - console.error("Invalid selection") - input("\nPress Enter to continue...") - - def _execute_evaluation(self, eval_path: Path) -> None: - """Execute evaluation with live results.""" - console.info("\nšŸš€ Running evaluation...") - - # Find main.py - main_py = self._find_main_py() - if not main_py: - console.error("Could not find main.py") - return - - # Build command - run from the project directory - cmd = [ - sys.executable, "-m", "uipath._cli.cli_eval", - str(main_py.relative_to(self.project_root)), - str(eval_path.relative_to(self.project_root)), - "--no-report", "--workers", "1" - ] - - console.info(f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report") - - try: - # Run with real-time output from project directory - process = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - bufsize=1, - universal_newlines=True, - cwd=self.project_root - ) - - # Stream output in real-time - if process.stdout: - for line in process.stdout: - print(line.rstrip()) - - process.wait() - - if process.returncode == 0: - console.success("\nāœ… Evaluation completed successfully!") - else: - console.error(f"\nāŒ Evaluation failed (exit code: {process.returncode})") - - except Exception as e: - console.error(f"Failed to run evaluation: {e}") - - def _execute_evaluation_no_clear(self, eval_path: Path) -> None: - """Execute evaluation without clearing screen.""" - console.info("\nšŸš€ Running evaluation...") - - # Find main.py - main_py = self._find_main_py() - if not main_py: - console.error("Could not find main.py") - input("\nPress Enter to continue...") - return - - # Build command - run from the project directory - cmd = [ - sys.executable, "-m", "uipath._cli.cli_eval", - str(main_py.relative_to(self.project_root)), - str(eval_path.relative_to(self.project_root)), - "--no-report", "--workers", "1" - ] - - console.info(f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report") - - try: - # Run with real-time output from project directory - process = subprocess.Popen( - cmd, - stdout=subprocess.PIPE, - stderr=subprocess.STDOUT, - text=True, - bufsize=1, - universal_newlines=True, - cwd=self.project_root - ) - - # Stream output in real-time - if process.stdout: - for line in process.stdout: - print(line.rstrip()) - - process.wait() - - if process.returncode == 0: - console.success("\nāœ… Evaluation completed successfully!") - else: - console.error(f"\nāŒ Evaluation failed (exit code: {process.returncode})") - - except Exception as e: - console.error(f"Failed to run evaluation: {e}") - - input("\nPress Enter to continue...") - - def _find_main_py(self) -> Optional[Path]: - """Find main.py file.""" - # Check current directory - main_py = self.project_root / "main.py" - if main_py.exists(): - return main_py - - # Check parent directories - for parent in self.project_root.parents: - main_py = parent / "main.py" - if main_py.exists(): - return main_py - - return None - - def _get_input(self, prompt: str) -> str: - """Get user input with prompt.""" - try: - return input(f"āž¤ {prompt}") - except KeyboardInterrupt: - raise - - def _select_from_list(self, items: List[Tuple[str, Path]], title: str) -> Optional[int]: - """Interactive list selection.""" - if not items: - console.warning(f"No {title.lower()} found") - return None - - console.info(f"\n{title}:") - for i, (name, _) in enumerate(items, 1): - console.info(f"{i}. {name}") - - try: - value = input(f"āž¤ {title} number: ") - num = int(value) - if 1 <= num <= len(items): - return num - else: - console.warning(f"Please enter a number between 1 and {len(items)}") - return None - except (ValueError, KeyboardInterrupt): - return None - - def _confirm(self, message: str) -> bool: - """Get yes/no confirmation.""" - response = self._get_input(f"{message} (y/n): ").lower() - return response in ['y', 'yes'] - - def _get_category_name(self, category: int) -> str: - """Get category name.""" - names = {0: "Deterministic", 1: "LLM Judge", 2: "Agent Scorer", 3: "Trajectory"} - return names.get(category, "Unknown") - - def _get_type_name(self, eval_type: int) -> str: - """Get type name.""" - names = { - 0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex", - 4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory" - } - return names.get(eval_type, "Unknown") - - def _drill_down_eval_sets(self) -> None: - """Drill down into eval sets with navigation.""" - if not self.eval_sets: - self._show_no_items_screen("eval sets") - return - - current_selection = 0 - while True: - self._clear_screen() - console.info("šŸ“‹ Eval Sets - Navigate & Select") - console.info("āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back") - console.info("─" * 65) - - for i, (name, path) in enumerate(self.eval_sets): - if i == current_selection: - console.info(f"ā–ŗ {i+1}. {name} ā—„") - self._show_eval_set_preview(path) - else: - console.info(f" {i+1}. {name}") - - key = self._get_key_input() - - if key in ['q', 'Q', 'back']: - break - elif key == 'up': - current_selection = (current_selection - 1) % len(self.eval_sets) - elif key == 'down': - current_selection = (current_selection + 1) % len(self.eval_sets) - elif key in ['enter', ' ']: - self._show_eval_set_details(self.eval_sets[current_selection]) - elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets): - current_selection = int(key) - 1 - - def _drill_down_evaluators(self) -> None: - """Drill down into evaluators with navigation.""" - if not self.evaluators: - self._show_no_items_screen("evaluators") - return - - current_selection = 0 - while True: - self._clear_screen() - console.info("āš™ļø Evaluators - Navigate & Select") - console.info("āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back") - console.info("─" * 65) - - for i, (name, path) in enumerate(self.evaluators): - if i == current_selection: - console.info(f"ā–ŗ {i+1}. {name} ā—„") - self._show_evaluator_preview(path) - else: - console.info(f" {i+1}. {name}") - - key = self._get_key_input() - - if key in ['q', 'Q', 'back']: - break - elif key == 'up': - current_selection = (current_selection - 1) % len(self.evaluators) - elif key == 'down': - current_selection = (current_selection + 1) % len(self.evaluators) - elif key in ['enter', ' ']: - self._show_evaluator_details(self.evaluators[current_selection]) - elif key.isdigit() and 1 <= int(key) <= len(self.evaluators): - current_selection = int(key) - 1 - - def _show_no_items_screen(self, item_type: str) -> None: - """Show no items screen.""" - self._clear_screen() - console.warning(f"No {item_type} found!") - console.info("Press Enter to go back...") - self._get_input("") - - def _show_eval_set_preview(self, path: Path) -> None: - """Show eval set preview info.""" - try: - with open(path) as f: - data = json.load(f) - test_count = len(data.get("evaluations", [])) - evaluator_count = len(data.get("evaluatorRefs", [])) - console.info(f" šŸ“„ {path.name}") - console.info(f" šŸ“Š Tests: {test_count} | Evaluators: {evaluator_count}") - except Exception: - console.info(f" šŸ“„ {path.name} (error loading)") - - def _show_evaluator_preview(self, path: Path) -> None: - """Show evaluator preview info.""" - try: - with open(path) as f: - data = json.load(f) - category = self._get_category_name(data.get("category", 0)) - type_name = self._get_type_name(data.get("type", 1)) - console.info(f" šŸ“„ {path.name}") - console.info(f" šŸŽÆ Type: {category} | {type_name}") - except Exception: - console.info(f" šŸ“„ {path.name} (error loading)") - - def _show_eval_set_details(self, eval_set_tuple: Tuple[str, Path]) -> None: - """Show detailed eval set view.""" - name, path = eval_set_tuple - self._clear_screen() - console.info(f"šŸ“‹ Eval Set Details: {name}") - console.info("─" * 65) - - try: - with open(path) as f: - data = json.load(f) - - console.info(f"šŸ“„ File: {path.name}") - console.info(f"šŸ†” ID: {data.get('id', 'Unknown')}") - console.info(f"šŸ“Š Tests: {len(data.get('evaluations', []))}") - console.info(f"āš™ļø Evaluators: {len(data.get('evaluatorRefs', []))}") - console.info(f"šŸ“¦ Batch Size: {data.get('batchSize', 'Unknown')}") - console.info(f"ā±ļø Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes") - - evaluator_refs = data.get('evaluatorRefs', []) - if evaluator_refs: - console.info("\nšŸŽÆ Evaluator References:") - for ref in evaluator_refs: - console.info(f" • {ref}") - - evaluations = data.get('evaluations', []) - if evaluations: - console.info("\nšŸ“ Test Cases:") - for i, eval_data in enumerate(evaluations[:10], 1): # Show first 10 - test_name = eval_data.get('name', f'Test {i}') - console.info(f" {i}. {test_name}") - if 'inputs' in eval_data: - inputs_preview = str(eval_data['inputs'])[:60] - if len(str(eval_data['inputs'])) > 60: - inputs_preview += "..." - console.info(f" Input: {inputs_preview}") - if 'expectedOutput' in eval_data: - output_preview = str(eval_data['expectedOutput'])[:60] - if len(str(eval_data['expectedOutput'])) > 60: - output_preview += "..." - console.info(f" Expected: {output_preview}") - - if len(evaluations) > 10: - console.info(f" ... and {len(evaluations) - 10} more tests") - - except Exception as e: - console.error(f"Error loading eval set: {e}") - - console.info("\nāŒØļø Press q/Backspace to go back...") - while True: - key = self._get_key_input() - if key in ['q', 'Q', 'back']: - break - - def _show_evaluator_details(self, evaluator_tuple: Tuple[str, Path]) -> None: - """Show detailed evaluator view.""" - name, path = evaluator_tuple - self._clear_screen() - console.info(f"āš™ļø Evaluator Details: {name}") - console.info("─" * 65) - - try: - with open(path) as f: - data = json.load(f) - - console.info(f"šŸ“„ File: {path.name}") - console.info(f"šŸ†” ID: {data.get('id', 'Unknown')}") - console.info(f"šŸ“ Description: {data.get('description', 'No description')}") - console.info(f"šŸ·ļø Category: {self._get_category_name(data.get('category', 0))}") - console.info(f"šŸŽÆ Type: {self._get_type_name(data.get('type', 1))}") - console.info(f"šŸ” Target Key: {data.get('targetOutputKey', '*')}") - - if 'llmConfig' in data: - llm_config = data['llmConfig'] - console.info("\nšŸ¤– LLM Configuration:") - console.info(f" Model: {llm_config.get('modelName', 'Unknown')}") - if 'prompt' in llm_config: - prompt_preview = llm_config['prompt'][:100] - if len(llm_config['prompt']) > 100: - prompt_preview += "..." - console.info(f" Prompt: {prompt_preview}") - - except Exception as e: - console.error(f"Error loading evaluator: {e}") - - console.info("\nāŒØļø Press q/Backspace to go back...") - while True: - key = self._get_key_input() - if key in ['q', 'Q', 'back']: - break - - def _create_eval_set(self) -> None: - """Create new evaluation set interactively.""" - console.info("\nāž• Create New Eval Set") - - name = self._get_input("Name: ") - if not name: - return - - # Create clean filename from name - filename = f"{name.lower().replace(' ', '_')}.json" - - # Create basic eval set - eval_set = { - "id": f"eval-{len(self.eval_sets) + 1}", - "fileName": filename, - "evaluatorRefs": [], - "name": name, - "batchSize": 10, - "timeoutMinutes": 20, - "modelSettings": [], - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "evaluations": [] - } - - # Ask if they want to add evaluations - add_evals = self._get_input("Add evaluations now? (y/n): ").lower() - if add_evals in ['y', 'yes']: - eval_set["evaluations"] = self._add_evaluations_interactive(str(eval_set["id"])) - - # Ensure evaluationSets directory exists - eval_sets_dir = self.project_root / "evaluationSets" - eval_sets_dir.mkdir(exist_ok=True) - - # Save file - file_path = eval_sets_dir / filename - - with open(file_path, 'w') as f: - json.dump(eval_set, f, indent=2) - - console.success(f"āœ… Created eval set: {filename}") - self._discover_files() # Refresh - - def _create_eval_set_interactive(self) -> None: - """Create new evaluation set with comprehensive questions.""" - self._clear_screen() - console.info("āž• Create New Eval Set - Interactive Wizard") - console.info("─" * 65) - - # Basic Information - console.info("šŸ“ Basic Information") - name = input("āž¤ Eval Set Name: ").strip() - if not name: - console.warning("Name is required!") - input("Press Enter to continue...") - return - - # Create clean filename from name - filename = f"{name.lower().replace(' ', '_')}.json" - - # Evaluator References - console.info("\nšŸŽÆ Evaluator References") - console.info("Available evaluators:") - for i, (eval_name, _) in enumerate(self.evaluators, 1): - console.info(f" {i}. {eval_name}") - - evaluator_refs = [] - if self.evaluators: - refs_input = input("āž¤ Select evaluators (comma-separated numbers, or 'all'): ").strip() - if refs_input.lower() == 'all': - evaluator_refs = [self._get_evaluator_id(path) for eval_name, path in self.evaluators] - elif refs_input: - try: - for num in refs_input.split(','): - idx = int(num.strip()) - 1 - if 0 <= idx < len(self.evaluators): - eval_path = self.evaluators[idx][1] - eval_id = self._get_evaluator_id(eval_path) - evaluator_refs.append(eval_id) - except ValueError: - console.warning("Invalid input, no evaluators selected") - - # Test Cases - console.info("\nšŸ“ Test Cases") - evaluations = [] - test_count = 1 - - while True: - console.info(f"\nTest Case #{test_count}") - test_name = input("āž¤ Test Name (or 'done' to finish): ").strip() - if test_name.lower() == 'done': - break - - if not test_name: - console.warning("Test name is required!") - continue - - # Inputs - console.info("šŸ“„ Inputs (JSON format)") - console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}") - inputs_str = input("āž¤ Inputs: ").strip() - try: - inputs = json.loads(inputs_str) if inputs_str else {} - except json.JSONDecodeError: - console.warning("Invalid JSON, using empty inputs") - inputs = {} - - # Expected Output - console.info("šŸ“¤ Expected Output (JSON format)") - expected_str = input("āž¤ Expected Output: ").strip() - try: - expected_output = json.loads(expected_str) if expected_str else {} - except json.JSONDecodeError: - console.warning("Invalid JSON, using empty expected output") - expected_output = {} - - evaluation: Dict[str, Any] = { - "id": f"test-{test_count}", - "name": test_name, - "inputs": inputs, - "expectedOutput": expected_output, - "expectedAgentBehavior": "", - "simulationInstructions": "", - "simulateInput": False, - "inputGenerationInstructions": "", - "simulateTools": False, - "toolsToSimulate": [], - "evalSetId": f"eval-{len(self.eval_sets) + 1}", - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') - } - evaluations.append(evaluation) - test_count += 1 - - if not evaluations: - console.warning("At least one test case is required!") - input("Press Enter to continue...") - return - - # Create eval set - eval_set = { - "id": f"eval-{len(self.eval_sets) + 1}", - "fileName": filename, - "evaluatorRefs": evaluator_refs, - "name": name, - "batchSize": 10, - "timeoutMinutes": 20, - "modelSettings": [], - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "evaluations": evaluations - } - - # Ensure evaluationSets directory exists - eval_sets_dir = self.project_root / "evaluationSets" - eval_sets_dir.mkdir(exist_ok=True) - - # Save file - file_path = eval_sets_dir / filename - - try: - with open(file_path, 'w') as f: - json.dump(eval_set, f, indent=2) - - console.success(f"\nāœ… Created eval set: {filename}") - console.info(f"šŸ“Š Tests: {len(evaluations)}") - console.info(f"āš™ļø Evaluators: {len(evaluator_refs)}") - - self._discover_files() # Refresh - except Exception as e: - console.error(f"Failed to create eval set: {e}") - - input("\nPress Enter to continue...") - - def _add_evaluations_interactive(self, eval_set_id: str) -> List[Dict[str, Any]]: - """Add evaluations interactively.""" - evaluations = [] - test_count = 1 - - while True: - console.info(f"\nTest Case #{test_count}") - test_name = self._get_input("Test Name (or 'done' to finish): ") - if test_name.lower() == 'done': - break - - if not test_name: - console.warning("Test name is required!") - continue - - # Simple inputs - console.info("Inputs (JSON format, e.g., {\"a\": 5, \"b\": 3})") - inputs_str = self._get_input("Inputs: ") - try: - inputs = json.loads(inputs_str) if inputs_str else {} - except json.JSONDecodeError: - console.warning("Invalid JSON, using empty inputs") - inputs = {} - - # Expected output - console.info("Expected Output (JSON format)") - expected_str = self._get_input("Expected Output: ") - try: - expected_output = json.loads(expected_str) if expected_str else {} - except json.JSONDecodeError: - console.warning("Invalid JSON, using empty expected output") - expected_output = {} - - evaluation: Dict[str, Any] = { - "id": f"test-{test_count}", - "name": test_name, - "inputs": inputs, - "expectedOutput": expected_output, - "expectedAgentBehavior": "", - "simulationInstructions": "", - "simulateInput": False, - "inputGenerationInstructions": "", - "simulateTools": False, - "toolsToSimulate": [], - "evalSetId": eval_set_id, - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') - } - evaluations.append(evaluation) - test_count += 1 - - return evaluations - - def _create_evaluator(self) -> None: - """Create new evaluator interactively.""" - console.info("\nāž• Create New Evaluator") - - # Select template - console.info("Templates:") - console.info("1. Exact Match") - console.info("2. JSON Similarity") - - template = self._get_number_input("Template (1-2): ", 1, 2) - if template is None: - return - - name = self._get_input("Name: ") - if not name: - return - - # Template configurations - if template == 1: - evaluator = { - "id": f"eval-{name.lower().replace(' ', '-')}", - "name": name, - "description": "Exact match evaluator", - "category": 0, - "type": 1, - "targetOutputKey": "*", - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') - } - else: # JSON Similarity - evaluator = { - "id": f"eval-{name.lower().replace(' ', '-')}", - "name": name, - "description": "JSON similarity evaluator", - "category": 0, - "type": 6, - "targetOutputKey": "*", - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') - } - - # Ensure evaluators directory exists - evaluators_dir = self.project_root / "evaluators" - evaluators_dir.mkdir(exist_ok=True) - - # Save file - filename = f"{name.lower().replace(' ', '_')}.json" - file_path = evaluators_dir / filename - - with open(file_path, 'w') as f: - json.dump(evaluator, f, indent=2) - - console.success(f"āœ… Created evaluator: {filename}") - self._discover_files() # Refresh - - def _create_evaluator_interactive(self) -> None: - """Create new evaluator with comprehensive questions.""" - self._clear_screen() - console.info("āž• Create New Evaluator - Interactive Wizard") - console.info("─" * 65) - - # Basic Information - console.info("šŸ“ Basic Information") - name = input("āž¤ Evaluator Name: ").strip() - if not name: - console.warning("Name is required!") - input("Press Enter to continue...") - return - - description = input("āž¤ Description: ").strip() or f"{name} evaluator" - - # Category Selection - console.info("\nšŸ·ļø Category Selection") - categories = { - 0: "Deterministic", - 1: "LLM as Judge", - 2: "Agent Scorer", - 3: "Trajectory" - } - - for key, value in categories.items(): - console.info(f" {key}. {value}") - - try: - category = int(input("āž¤ Select Category (0-3): ") or "0") - if category not in categories: - category = 0 - except ValueError: - category = 0 - - # Type Selection - console.info(f"\nšŸŽÆ Type Selection (Category: {categories[category]})") - types = { - 0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex", - 4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory" - } - - # Show relevant types based on category - relevant_types = [] - if category == 0: # Deterministic - relevant_types = [1, 2, 3, 6] # Exact Match, Contains, Regex, JSON Similarity - elif category == 1: # LLM as Judge - relevant_types = [4, 5] # Factuality, Custom - elif category == 3: # Trajectory - relevant_types = [7] # Trajectory - else: - relevant_types = list(types.keys()) - - for type_id in relevant_types: - console.info(f" {type_id}. {types[type_id]}") - - try: - eval_type = int(input(f"āž¤ Select Type ({', '.join(map(str, relevant_types))}): ") or str(relevant_types[0])) - if eval_type not in relevant_types: - eval_type = relevant_types[0] - except (ValueError, IndexError): - eval_type = 1 - - # Target Output Key - console.info("\nšŸ” Target Configuration") - console.info("Target Output Key determines which part of the output to evaluate") - console.info("Examples: '*' (all), 'result', 'answer', 'output'") - target_key = input("āž¤ Target Output Key (default: '*'): ").strip() or "*" - - # Create basic evaluator - evaluator = { - "id": f"eval-{name.lower().replace(' ', '-')}", - "name": name, - "description": description, - "category": category, - "type": eval_type, - "targetOutputKey": target_key, - "createdAt": "2025-01-25T00:00:00Z", - "updatedAt": "2025-01-25T00:00:00Z" - } - - # LLM Configuration (if LLM as Judge) - if category == 1: # LLM as Judge - console.info("\nšŸ¤– LLM Configuration") - model_name = input("āž¤ Model Name (default: gpt-4): ").strip() or "gpt-4" - - console.info("šŸ“ Evaluation Prompt") - console.info("This prompt will be used to evaluate the agent's output") - prompt = input("āž¤ Evaluation Prompt: ").strip() - - if prompt: - evaluator["llmConfig"] = { - "modelName": model_name, - "prompt": prompt, - "temperature": 0.0, - "maxTokens": 1000 - } - - # Ensure evaluators directory exists - evaluators_dir = self.project_root / "evaluators" - evaluators_dir.mkdir(exist_ok=True) - - # Save file - filename = f"{name.lower().replace(' ', '_')}.json" - file_path = evaluators_dir / filename - - try: - with open(file_path, 'w') as f: - json.dump(evaluator, f, indent=2) - - console.success(f"\nāœ… Created evaluator: {filename}") - console.info(f"šŸ·ļø Category: {categories[category]}") - console.info(f"šŸŽÆ Type: {types[eval_type]}") - console.info(f"šŸ” Target: {target_key}") - - self._discover_files() # Refresh - except Exception as e: - console.error(f"Failed to create evaluator: {e}") - - input("\nPress Enter to continue...") - - def _get_number_input(self, prompt: str, min_val: int, max_val: int) -> Optional[int]: - """Get number input with validation.""" - try: - value = input(f"āž¤ {prompt}") - num = int(value) - if min_val <= num <= max_val: - return num - else: - console.warning(f"Please enter a number between {min_val} and {max_val}") - return None - except (ValueError, KeyboardInterrupt): - return None - - def _get_evaluator_id(self, path: Path) -> str: - """Get evaluator ID from file.""" - try: - with open(path) as f: - data = json.load(f) - return data.get("id", path.stem) - except Exception: - return path.stem - - -def launch_interactive_cli(project_root: Optional[Path] = None) -> None: - """Launch the interactive CLI.""" - cli = InteractiveEvalCLI(project_root) - cli.run() diff --git a/src/uipath/_cli/_interactive/__init__.py b/src/uipath/_cli/_interactive/__init__.py new file mode 100644 index 000000000..3fe5a81ab --- /dev/null +++ b/src/uipath/_cli/_interactive/__init__.py @@ -0,0 +1,5 @@ +"""Interactive evaluation CLI module.""" + +from ._main import launch_interactive_cli + +__all__ = ["launch_interactive_cli"] diff --git a/src/uipath/_cli/_interactive/_discovery.py b/src/uipath/_cli/_interactive/_discovery.py new file mode 100644 index 000000000..b0b4a68b1 --- /dev/null +++ b/src/uipath/_cli/_interactive/_discovery.py @@ -0,0 +1,46 @@ +"""Discovery utilities for finding eval sets and evaluators.""" +# type: ignore + +import json +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from ._main import InteractiveEvalCLI + + +class DiscoveryMixin: + """Mixin for file discovery operations.""" + + def _discover_files(self: "InteractiveEvalCLI") -> None: + """Quickly discover eval sets and evaluators.""" + # Clear existing lists to avoid duplicates + self.eval_sets.clear() + self.evaluators.clear() + + # Find eval sets from evaluationSets folder + eval_sets_dir = self.project_root / "evaluationSets" + if eval_sets_dir.exists(): + for eval_file in eval_sets_dir.glob("*.json"): + try: + with open(eval_file) as f: + data = json.load(f) + # Check if it's an eval set by presence of "evaluations" array + if "evaluations" in data and isinstance(data.get("evaluations"), list): + name = data.get("name", eval_file.stem) + self.eval_sets.append((name, eval_file)) + except Exception: + pass + + # Find evaluators from evaluators folder + evaluators_dir = self.project_root / "evaluators" + if evaluators_dir.exists(): + for eval_file in evaluators_dir.glob("*.json"): + try: + with open(eval_file) as f: + data = json.load(f) + # Verify it has evaluator-specific fields + if "id" in data and "type" in data: + name = data.get("name", eval_file.stem) + self.evaluators.append((name, eval_file)) + except Exception: + pass diff --git a/src/uipath/_cli/_interactive/_drill_down.py b/src/uipath/_cli/_interactive/_drill_down.py new file mode 100644 index 000000000..6bdcf7453 --- /dev/null +++ b/src/uipath/_cli/_interactive/_drill_down.py @@ -0,0 +1,88 @@ +"""Drill-down navigation for eval sets and evaluators.""" +# type: ignore + +from typing import TYPE_CHECKING + +from .._utils._console import ConsoleLogger + +if TYPE_CHECKING: + from ._main import InteractiveEvalCLI + +console = ConsoleLogger() + + +class DrillDownMixin: + """Mixin for drill-down navigation operations.""" + + def _drill_down_eval_sets(self: "InteractiveEvalCLI") -> None: + """Drill down into eval sets with navigation.""" + if not self.eval_sets: + self._show_no_items_screen("eval sets") + return + + current_selection = 0 + while True: + self._clear_screen() + console.info("šŸ“‹ Eval Sets - Navigate & Select") + console.info("āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back") + console.info("─" * 65) + + for i, (name, path) in enumerate(self.eval_sets): + if i == current_selection: + console.info(f"ā–ŗ {i+1}. {name} ā—„") + self._show_eval_set_preview(path) + else: + console.info(f" {i+1}. {name}") + + key = self._get_key_input() + + if key in ['q', 'Q', 'back']: + break + elif key == 'up': + current_selection = (current_selection - 1) % len(self.eval_sets) + elif key == 'down': + current_selection = (current_selection + 1) % len(self.eval_sets) + elif key in ['enter', ' ']: + self._show_eval_set_details(self.eval_sets[current_selection]) + elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets): + current_selection = int(key) - 1 + + def _drill_down_evaluators(self: "InteractiveEvalCLI") -> None: + """Drill down into evaluators with navigation.""" + if not self.evaluators: + self._show_no_items_screen("evaluators") + return + + current_selection = 0 + while True: + self._clear_screen() + console.info("āš™ļø Evaluators - Navigate & Select") + console.info("āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back") + console.info("─" * 65) + + for i, (name, path) in enumerate(self.evaluators): + if i == current_selection: + console.info(f"ā–ŗ {i+1}. {name} ā—„") + self._show_evaluator_preview(path) + else: + console.info(f" {i+1}. {name}") + + key = self._get_key_input() + + if key in ['q', 'Q', 'back']: + break + elif key == 'up': + current_selection = (current_selection - 1) % len(self.evaluators) + elif key == 'down': + current_selection = (current_selection + 1) % len(self.evaluators) + elif key in ['enter', ' ']: + self._show_evaluator_details(self.evaluators[current_selection]) + elif key.isdigit() and 1 <= int(key) <= len(self.evaluators): + current_selection = int(key) - 1 + + def _show_no_items_screen(self: "InteractiveEvalCLI", item_type: str) -> None: + """Show no items screen.""" + self._clear_screen() + console.warning(f"No {item_type} found!") + console.info("Press Enter to go back...") + self._get_input("") diff --git a/src/uipath/_cli/_interactive/_eval_sets.py b/src/uipath/_cli/_interactive/_eval_sets.py new file mode 100644 index 000000000..9f6382149 --- /dev/null +++ b/src/uipath/_cli/_interactive/_eval_sets.py @@ -0,0 +1,329 @@ +"""Eval set operations for interactive CLI.""" +# type: ignore + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING, Any, Dict, List + +from .._utils._console import ConsoleLogger + +if TYPE_CHECKING: + from ._main import InteractiveEvalCLI + +console = ConsoleLogger() + + +class EvalSetMixin: + """Mixin for eval set operations.""" + + def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None: + """Create new evaluation set - simplified version.""" + self._clear_screen() + console.info("āž• Create New Eval Set") + console.info("─" * 65) + + name = self._get_input("Name: ") + if not name: + return + + # Create clean filename from name + filename = f"{name.lower().replace(' ', '_')}.json" + + # Create basic eval set + eval_set = { + "id": f"eval-{len(self.eval_sets) + 1}", + "fileName": filename, + "evaluatorRefs": [], + "name": name, + "batchSize": 10, + "timeoutMinutes": 20, + "modelSettings": [], + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "evaluations": [] + } + + # Ask if they want to add evaluations + add_evals = self._get_input("Add evaluations now? (y/n): ").lower() + if add_evals in ['y', 'yes']: + eval_set["evaluations"] = self._add_evaluations_interactive(str(eval_set["id"])) + + # Ensure evaluationSets directory exists + eval_sets_dir = self.project_root / "evaluationSets" + eval_sets_dir.mkdir(exist_ok=True) + + # Save file + file_path = eval_sets_dir / filename + + with open(file_path, 'w') as f: + json.dump(eval_set, f, indent=2) + + console.success(f"āœ… Created eval set: {filename}") + self._discover_files() # Refresh + + def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None: + """Create new evaluation set with comprehensive questions.""" + self._clear_screen() + console.info("āž• Create New Eval Set - Interactive Wizard") + console.info("─" * 65) + + # Basic Information + console.info("šŸ“ Basic Information") + name = input("āž¤ Eval Set Name: ").strip() + if not name: + console.warning("Name is required!") + input("Press Enter to continue...") + return + + # Create clean filename from name + filename = f"{name.lower().replace(' ', '_')}.json" + + # Evaluator References + console.info("\nšŸŽÆ Evaluator References") + console.info("Available evaluators:") + for i, (eval_name, _) in enumerate(self.evaluators, 1): + console.info(f" {i}. {eval_name}") + + evaluator_refs = [] + if self.evaluators: + refs_input = input("āž¤ Select evaluators (comma-separated numbers, or 'all'): ").strip() + if refs_input.lower() == 'all': + evaluator_refs = [self._get_evaluator_id(path) for eval_name, path in self.evaluators] + elif refs_input: + try: + for num in refs_input.split(','): + idx = int(num.strip()) - 1 + if 0 <= idx < len(self.evaluators): + eval_path = self.evaluators[idx][1] + eval_id = self._get_evaluator_id(eval_path) + evaluator_refs.append(eval_id) + except ValueError: + console.warning("Invalid input, no evaluators selected") + + # Test Cases + console.info("\nšŸ“ Test Cases") + evaluations = [] + test_count = 1 + + while True: + console.info(f"\nTest Case #{test_count}") + test_name = input("āž¤ Test Name (or 'done' to finish): ").strip() + if test_name.lower() == 'done': + break + + if not test_name: + console.warning("Test name is required!") + continue + + # Inputs + console.info("šŸ“„ Inputs (JSON format)") + console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}") + inputs_str = input("āž¤ Inputs: ").strip() + try: + inputs = json.loads(inputs_str) if inputs_str else {} + except json.JSONDecodeError: + console.warning("Invalid JSON, using empty inputs") + inputs = {} + + # Expected Output + console.info("šŸ“¤ Expected Output (JSON format)") + expected_str = input("āž¤ Expected Output: ").strip() + try: + expected_output = json.loads(expected_str) if expected_str else {} + except json.JSONDecodeError: + console.warning("Invalid JSON, using empty expected output") + expected_output = {} + + evaluation: Dict[str, Any] = { + "id": f"test-{test_count}", + "name": test_name, + "inputs": inputs, + "expectedOutput": expected_output, + "expectedAgentBehavior": "", + "simulationInstructions": "", + "simulateInput": False, + "inputGenerationInstructions": "", + "simulateTools": False, + "toolsToSimulate": [], + "evalSetId": f"eval-{len(self.eval_sets) + 1}", + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + } + evaluations.append(evaluation) + test_count += 1 + + if not evaluations: + console.warning("At least one test case is required!") + input("Press Enter to continue...") + return + + # Create eval set + eval_set = { + "id": f"eval-{len(self.eval_sets) + 1}", + "fileName": filename, + "evaluatorRefs": evaluator_refs, + "name": name, + "batchSize": 10, + "timeoutMinutes": 20, + "modelSettings": [], + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "evaluations": evaluations + } + + # Ensure evaluationSets directory exists + eval_sets_dir = self.project_root / "evaluationSets" + eval_sets_dir.mkdir(exist_ok=True) + + # Save file + file_path = eval_sets_dir / filename + + try: + with open(file_path, 'w') as f: + json.dump(eval_set, f, indent=2) + + console.success(f"\nāœ… Created eval set: {filename}") + console.info(f"šŸ“Š Tests: {len(evaluations)}") + console.info(f"āš™ļø Evaluators: {len(evaluator_refs)}") + + self._discover_files() # Refresh + except Exception as e: + console.error(f"Failed to create eval set: {e}") + + input("\nPress Enter to continue...") + + def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -> List[Dict[str, Any]]: + """Add evaluations interactively.""" + evaluations = [] + test_count = 1 + + while True: + console.info(f"\nTest Case #{test_count}") + test_name = self._get_input("Test Name (or 'done' to finish): ") + if test_name.lower() == 'done': + break + + if not test_name: + console.warning("Test name is required!") + continue + + # Inputs + console.info("šŸ“„ Inputs (JSON format)") + console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}") + inputs_str = input("āž¤ Inputs: ").strip() + try: + inputs = json.loads(inputs_str) if inputs_str else {} + except json.JSONDecodeError: + console.warning("Invalid JSON, using empty inputs") + inputs = {} + + # Expected Output + console.info("šŸ“¤ Expected Output (JSON format)") + expected_str = input("āž¤ Expected Output: ").strip() + try: + expected_output = json.loads(expected_str) if expected_str else {} + except json.JSONDecodeError: + console.warning("Invalid JSON, using empty expected output") + expected_output = {} + + evaluation: Dict[str, Any] = { + "id": f"test-{test_count}", + "name": test_name, + "inputs": inputs, + "expectedOutput": expected_output, + "expectedAgentBehavior": "", + "simulationInstructions": "", + "simulateInput": False, + "inputGenerationInstructions": "", + "simulateTools": False, + "toolsToSimulate": [], + "evalSetId": eval_set_id, + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + } + evaluations.append(evaluation) + test_count += 1 + + return evaluations + + def _list_eval_sets(self: "InteractiveEvalCLI") -> None: + """List available eval sets.""" + console.info("\nšŸ“‹ Available Eval Sets:") + if not self.eval_sets: + console.warning("No eval sets found") + return + + for i, (name, path) in enumerate(self.eval_sets, 1): + try: + with open(path) as f: + data = json.load(f) + test_count = len(data.get("evaluations", [])) + evaluator_count = len(data.get("evaluatorRefs", [])) + console.info(f"{i}. {name}") + console.info(f" Tests: {test_count} | Evaluators: {evaluator_count}") + console.info(f" File: {path.name}") + except Exception: + console.info(f"{i}. {name} (error loading)") + + def _show_eval_set_preview(self: "InteractiveEvalCLI", path: Path) -> None: + """Show eval set preview info.""" + try: + with open(path) as f: + data = json.load(f) + test_count = len(data.get("evaluations", [])) + evaluator_count = len(data.get("evaluatorRefs", [])) + console.info(f" šŸ“„ {path.name}") + console.info(f" šŸ“Š Tests: {test_count} | Evaluators: {evaluator_count}") + except Exception: + console.info(f" šŸ“„ {path.name} (error loading)") + + def _show_eval_set_details(self: "InteractiveEvalCLI", eval_set_tuple: tuple[str, Path]) -> None: + """Show detailed eval set view.""" + name, path = eval_set_tuple + self._clear_screen() + console.info(f"šŸ“‹ Eval Set Details: {name}") + console.info("─" * 65) + + try: + with open(path) as f: + data = json.load(f) + + console.info(f"\nšŸ“„ {path.name}") + console.info(f"šŸ†” ID: {data.get('id', 'Unknown')}") + console.info(f"šŸ“Š Tests: {len(data.get('evaluations', []))}") + console.info(f"āš™ļø Evaluators: {len(data.get('evaluatorRefs', []))}") + console.info(f"šŸ“¦ Batch Size: {data.get('batchSize', 'Unknown')}") + console.info(f"ā±ļø Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes") + + evaluator_refs = data.get('evaluatorRefs', []) + if evaluator_refs: + console.info("\nšŸŽÆ Evaluator References:") + for ref in evaluator_refs: + console.info(f" • {ref}") + + evaluations = data.get('evaluations', []) + if evaluations: + console.info("\nšŸ“ Test Cases:") + for i, eval_data in enumerate(evaluations[:10], 1): # Show first 10 + test_name = eval_data.get('name', f'Test {i}') + console.info(f" {i}. {test_name}") + if 'inputs' in eval_data: + inputs_preview = str(eval_data['inputs'])[:60] + if len(str(eval_data['inputs'])) > 60: + inputs_preview += "..." + console.info(f" Input: {inputs_preview}") + if 'expectedOutput' in eval_data: + output_preview = str(eval_data['expectedOutput'])[:60] + if len(str(eval_data['expectedOutput'])) > 60: + output_preview += "..." + console.info(f" Expected: {output_preview}") + + if len(evaluations) > 10: + console.info(f"\n ... and {len(evaluations) - 10} more tests") + + except Exception as e: + console.error(f"Error loading eval set: {e}") + + console.info("\nšŸ’” Press Backspace to go back") + self._get_key_input() diff --git a/src/uipath/_cli/_interactive/_evaluators.py b/src/uipath/_cli/_interactive/_evaluators.py new file mode 100644 index 000000000..bb2968569 --- /dev/null +++ b/src/uipath/_cli/_interactive/_evaluators.py @@ -0,0 +1,273 @@ +"""Evaluator operations for interactive CLI.""" +# type: ignore + +import json +from datetime import datetime, timezone +from pathlib import Path +from typing import TYPE_CHECKING + +from .._utils._console import ConsoleLogger + +if TYPE_CHECKING: + from ._main import InteractiveEvalCLI + +console = ConsoleLogger() + + +class EvaluatorMixin: + """Mixin for evaluator operations.""" + + def _create_evaluator_simple(self: "InteractiveEvalCLI") -> None: + """Create new evaluator - simplified version.""" + self._clear_screen() + console.info("āž• Create New Evaluator") + console.info("─" * 65) + + name = self._get_input("Name: ") + if not name: + return + + # Create basic evaluator + evaluator = { + "id": f"eval-{name.lower().replace(' ', '-')}", + "name": name, + "description": f"{name} evaluator", + "category": 0, + "type": 1, + "targetOutputKey": "*", + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + } + + # Ensure evaluators directory exists + evaluators_dir = self.project_root / "evaluators" + evaluators_dir.mkdir(exist_ok=True) + + # Save file + filename = f"{name.lower().replace(' ', '_')}.json" + file_path = evaluators_dir / filename + + with open(file_path, 'w') as f: + json.dump(evaluator, f, indent=2) + + console.success(f"āœ… Created evaluator: {filename}") + self._discover_files() # Refresh + + def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None: + """Create new evaluator with comprehensive questions.""" + self._clear_screen() + console.info("āž• Create New Evaluator - Interactive Wizard") + console.info("─" * 65) + + # Basic Information + console.info("šŸ“ Basic Information") + name = input("āž¤ Evaluator Name: ").strip() + if not name: + console.warning("Name is required!") + input("Press Enter to continue...") + return + + description = input("āž¤ Description: ").strip() or f"{name} evaluator" + + # Category Selection + console.info("\nšŸ·ļø Category Selection") + categories = { + 0: "Deterministic", + 1: "LLM as Judge", + 2: "Agent Scorer", + 3: "Trajectory" + } + + for key, value in categories.items(): + console.info(f" {key}. {value}") + + try: + category = int(input("āž¤ Select Category (0-3): ") or "0") + if category not in categories: + category = 0 + except ValueError: + category = 0 + + # Type Selection + console.info(f"\nšŸŽÆ Type Selection (Category: {categories[category]})") + types = { + 0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex", + 4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory" + } + + # Show relevant types based on category + relevant_types = [] + if category == 0: # Deterministic + relevant_types = [1, 2, 3, 6] # Exact Match, Contains, Regex, JSON Similarity + elif category == 1: # LLM as Judge + relevant_types = [4, 5] # Factuality, Custom + elif category == 3: # Trajectory + relevant_types = [7] # Trajectory + else: + relevant_types = list(types.keys()) + + for type_id in relevant_types: + console.info(f" {type_id}. {types[type_id]}") + + try: + eval_type = int(input(f"āž¤ Select Type ({', '.join(map(str, relevant_types))}): ") or str(relevant_types[0])) + if eval_type not in relevant_types: + eval_type = relevant_types[0] + except (ValueError, IndexError): + eval_type = 1 + + # Target Output Key + console.info("\nšŸ” Target Configuration") + console.info("Target Output Key determines which part of the output to evaluate") + console.info("Examples: '*' (all), 'result', 'answer', 'output'") + target_key = input("āž¤ Target Output Key (default: '*'): ").strip() or "*" + + # Create basic evaluator + evaluator = { + "id": f"eval-{name.lower().replace(' ', '-')}", + "name": name, + "description": description, + "category": category, + "type": eval_type, + "targetOutputKey": target_key, + "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), + "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + } + + # LLM Configuration (if LLM as Judge) + if category == 1: # LLM as Judge + console.info("\nšŸ¤– LLM Configuration") + model_name = input("āž¤ Model Name (default: gpt-4): ").strip() or "gpt-4" + + console.info("šŸ“ Evaluation Prompt") + console.info("This prompt will be used to evaluate the agent's output") + prompt = input("āž¤ Evaluation Prompt: ").strip() + + if prompt: + evaluator["llmConfig"] = { + "modelName": model_name, + "prompt": prompt, + "temperature": 0.0, + "maxTokens": 1000 + } + + # Ensure evaluators directory exists + evaluators_dir = self.project_root / "evaluators" + evaluators_dir.mkdir(exist_ok=True) + + # Save file + filename = f"{name.lower().replace(' ', '_')}.json" + file_path = evaluators_dir / filename + + try: + with open(file_path, 'w') as f: + json.dump(evaluator, f, indent=2) + + console.success(f"\nāœ… Created evaluator: {filename}") + console.info(f"šŸ·ļø Category: {categories[category]}") + console.info(f"šŸŽÆ Type: {types[eval_type]}") + console.info(f"šŸ” Target: {target_key}") + + self._discover_files() # Refresh + except Exception as e: + console.error(f"Failed to create evaluator: {e}") + + input("\nPress Enter to continue...") + + def _list_evaluators(self: "InteractiveEvalCLI") -> None: + """List available evaluators.""" + console.info("\nāš™ļø Available Evaluators:") + if not self.evaluators: + console.warning("No evaluators found") + return + + for i, (name, path) in enumerate(self.evaluators, 1): + try: + with open(path) as f: + data = json.load(f) + category = self._get_category_name(data.get("category", 0)) + type_name = self._get_type_name(data.get("type", 1)) + console.info(f"{i}. {name}") + console.info(f" Type: {category} | {type_name}") + console.info(f" File: {path.name}") + except Exception: + console.info(f"{i}. {name} (error loading)") + + def _show_evaluator_preview(self: "InteractiveEvalCLI", path: Path) -> None: + """Show evaluator preview info.""" + try: + with open(path) as f: + data = json.load(f) + category = self._get_category_name(data.get("category", 0)) + type_name = self._get_type_name(data.get("type", 1)) + console.info(f" šŸ“„ {path.name}") + console.info(f" šŸŽÆ Type: {category} | {type_name}") + except Exception: + console.info(f" šŸ“„ {path.name} (error loading)") + + def _show_evaluator_details(self: "InteractiveEvalCLI", evaluator_tuple: tuple[str, Path]) -> None: + """Show detailed evaluator view.""" + name, path = evaluator_tuple + self._clear_screen() + console.info(f"āš™ļø Evaluator Details: {name}") + console.info("─" * 65) + + try: + with open(path) as f: + data = json.load(f) + + console.info(f"\nšŸ“„ {path.name}") + console.info(f"šŸ†” ID: {data.get('id', 'Unknown')}") + console.info(f"šŸ“ Description: {data.get('description', 'No description')}") + console.info(f"šŸ·ļø Category: {self._get_category_name(data.get('category', 0))}") + console.info(f"šŸŽÆ Type: {self._get_type_name(data.get('type', 1))}") + console.info(f"šŸ” Target Key: {data.get('targetOutputKey', '*')}") + + if 'llmConfig' in data: + llm_config = data['llmConfig'] + console.info("\nšŸ¤– LLM Configuration:") + console.info(f" Model: {llm_config.get('modelName', 'Unknown')}") + if 'prompt' in llm_config: + prompt_preview = llm_config['prompt'][:100] + if len(llm_config['prompt']) > 100: + prompt_preview += "..." + console.info(f" Prompt: {prompt_preview}") + + except Exception as e: + console.error(f"Error loading evaluator: {e}") + + console.info("\nšŸ’” Press Backspace to go back") + self._get_key_input() + + def _get_category_name(self: "InteractiveEvalCLI", category: int) -> str: + """Get category name from number.""" + categories = { + 0: "Deterministic", + 1: "LLM as Judge", + 2: "Agent Scorer", + 3: "Trajectory" + } + return categories.get(category, "Unknown") + + def _get_type_name(self: "InteractiveEvalCLI", eval_type: int) -> str: + """Get type name from number.""" + types = { + 0: "Unknown", + 1: "Exact Match", + 2: "Contains", + 3: "Regex", + 4: "Factuality", + 5: "Custom", + 6: "JSON Similarity", + 7: "Trajectory" + } + return types.get(eval_type, "Unknown") + + def _get_evaluator_id(self: "InteractiveEvalCLI", path: Path) -> str: + """Get evaluator ID from file.""" + try: + with open(path) as f: + data = json.load(f) + return data.get("id", path.stem) + except Exception: + return path.stem diff --git a/src/uipath/_cli/_interactive/_execution.py b/src/uipath/_cli/_interactive/_execution.py new file mode 100644 index 000000000..f2283bc2b --- /dev/null +++ b/src/uipath/_cli/_interactive/_execution.py @@ -0,0 +1,135 @@ +"""Execution utilities for running evaluations.""" +# type: ignore + +import subprocess +import sys +from pathlib import Path +from typing import TYPE_CHECKING, Optional + +from .._utils._console import ConsoleLogger + +if TYPE_CHECKING: + from ._main import InteractiveEvalCLI + +console = ConsoleLogger() + + +class ExecutionMixin: + """Mixin for execution operations.""" + + def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None: + """Execute evaluation with live results.""" + console.info("\nšŸš€ Running evaluation...") + + # Find main.py + main_py = self._find_main_py() + if not main_py: + console.error("Could not find main.py") + return + + # Build command - run from the project directory + cmd = [ + sys.executable, "-m", "uipath._cli.cli_eval", + str(main_py.relative_to(self.project_root)), + str(eval_path.relative_to(self.project_root)), + "--no-report", "--workers", "1" + ] + + console.info(f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report") + + try: + # Run with real-time output from project directory + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + universal_newlines=True, + cwd=self.project_root + ) + + # Stream output in real-time + if process.stdout: + for line in process.stdout: + print(line.rstrip()) + + process.wait() + + if process.returncode == 0: + console.success("\nāœ… Evaluation completed successfully!") + else: + console.error(f"\nāŒ Evaluation failed (exit code: {process.returncode})") + + except Exception as e: + console.error(f"Failed to run evaluation: {e}") + + def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) -> None: + """Execute evaluation without clearing screen.""" + console.info("\nšŸš€ Running evaluation...") + + # Find main.py + main_py = self._find_main_py() + if not main_py: + console.error("Could not find main.py") + input("\nPress Enter to continue...") + return + + # Build command - run from the project directory + cmd = [ + sys.executable, "-m", "uipath._cli.cli_eval", + str(main_py.relative_to(self.project_root)), + str(eval_path.relative_to(self.project_root)), + "--no-report", "--workers", "1" + ] + + console.info(f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report") + + try: + # Run with real-time output from project directory + process = subprocess.Popen( + cmd, + stdout=subprocess.PIPE, + stderr=subprocess.STDOUT, + text=True, + bufsize=1, + universal_newlines=True, + cwd=self.project_root + ) + + # Stream output in real-time + if process.stdout: + for line in process.stdout: + print(line.rstrip()) + + process.wait() + + if process.returncode == 0: + console.success("\nāœ… Evaluation completed successfully!") + else: + console.error(f"\nāŒ Evaluation failed (exit code: {process.returncode})") + + except Exception as e: + console.error(f"Failed to run evaluation: {e}") + + input("\nPress Enter to continue...") + + def _find_main_py(self: "InteractiveEvalCLI") -> Optional[Path]: + """Find main.py file.""" + # Check current directory + main_py = self.project_root / "main.py" + if main_py.exists(): + return main_py + + # Check parent directories + for parent in self.project_root.parents: + main_py = parent / "main.py" + if main_py.exists(): + return main_py + + return None + + def _confirm(self: "InteractiveEvalCLI", prompt: str) -> bool: + """Ask for confirmation.""" + response = self._get_input(f"{prompt} (y/n): ").lower() + return response in ['y', 'yes'] diff --git a/src/uipath/_cli/_interactive/_main.py b/src/uipath/_cli/_interactive/_main.py new file mode 100644 index 000000000..c15afc2e0 --- /dev/null +++ b/src/uipath/_cli/_interactive/_main.py @@ -0,0 +1,193 @@ +"""Main interactive CLI for evaluations.""" + +from pathlib import Path +from typing import List, Optional, Tuple + +from .._utils._console import ConsoleLogger +from ._discovery import DiscoveryMixin +from ._drill_down import DrillDownMixin +from ._eval_sets import EvalSetMixin +from ._evaluators import EvaluatorMixin +from ._execution import ExecutionMixin +from ._navigation import HAS_NAVIGATION, NavigationMixin + +console = ConsoleLogger() + + +class InteractiveEvalCLI( + NavigationMixin, + DiscoveryMixin, + EvalSetMixin, + EvaluatorMixin, + ExecutionMixin, + DrillDownMixin +): + """Simple, fast, keyboard-driven evaluation CLI.""" + + def __init__(self, project_root: Optional[Path] = None): + self.project_root = project_root or Path.cwd() + self.eval_sets: List[Tuple[str, Path]] = [] + self.evaluators: List[Tuple[str, Path]] = [] + self.current_selection = 0 + self.menu_items = [ + "šŸ“‹ List eval sets", + "āš™ļø List evaluators", + "⚔ Quick run (auto-select)", + "āž• Create eval set", + "āž• Create evaluator", + "šŸŽÆ Run specific combination" + ] + self._discover_files() + + def run(self) -> None: + """Run the interactive CLI.""" + self._show_ascii_art() + + if not HAS_NAVIGATION: + console.warning("āš ļø Terminal navigation not available. Using fallback mode.") + console.info("Consider using a standard terminal for better experience.\n") + self._run_fallback_mode() + return + + try: + self._run_navigation_mode() + except KeyboardInterrupt: + console.info("\nšŸ‘‹ Goodbye!") + + def _run_navigation_mode(self) -> None: + """Run with arrow key navigation.""" + while True: + self._clear_screen() + self._show_ascii_art() + self._show_menu(self.current_selection, self.menu_items) + + key = self._get_key_input() + + if key == 'up': + self.current_selection = (self.current_selection - 1) % len(self.menu_items) + elif key == 'down': + self.current_selection = (self.current_selection + 1) % len(self.menu_items) + elif key in ['enter', ' ']: + self._execute_menu_item_with_navigation(self.current_selection) + elif key.isdigit() and 1 <= int(key) <= 6: + self._execute_menu_item_with_navigation(int(key) - 1) + + def _execute_menu_item_with_navigation(self, index: int) -> None: + """Execute menu item with navigation support.""" + if index == 0: + self._drill_down_eval_sets() + elif index == 1: + self._drill_down_evaluators() + elif index == 2: + self._quick_run_with_navigation() + elif index == 3: + self._create_eval_set_interactive() + elif index == 4: + self._create_evaluator_interactive() + elif index == 5: + self._run_specific_combination() + + def _run_fallback_mode(self) -> None: + """Run without navigation - simple text interface.""" + while True: + console.info("\nāš™ļø Main Menu:") + for i, item in enumerate(self.menu_items, 1): + console.info(f" {i}. {item}") + console.info(" 0. Exit") + + try: + choice = input("\nāž¤ Select option: ").strip() + + if choice == '0': + console.info("šŸ‘‹ Goodbye!") + break + elif choice == '1': + self._list_eval_sets_navigation() + elif choice == '2': + self._list_evaluators() + elif choice == '3': + self._quick_run() + elif choice == '4': + self._create_eval_set_simple() + elif choice == '5': + self._create_evaluator_simple() + elif choice == '6': + self._run_specific_combination() + else: + console.warning("Invalid option") + except KeyboardInterrupt: + console.info("\nšŸ‘‹ Goodbye!") + break + + def _quick_run_with_navigation(self) -> None: + """Quick run evaluation with auto-selected eval set.""" + if not self.eval_sets: + self._clear_screen() + console.warning("No eval sets found!") + console.info("Press Enter to go back...") + self._get_input("") + return + + # Use first eval set + eval_name, eval_path = self.eval_sets[0] + + self._clear_screen() + console.info(f"⚔ Quick Run: {eval_name}") + console.info("─" * 65) + + if self._confirm("Run evaluation now?"): + self._execute_evaluation_no_clear(eval_path) + + def _quick_run(self) -> None: + """Quick run evaluation with auto-selected eval set.""" + if not self.eval_sets: + console.warning("No eval sets found!") + return + + # Use first eval set + eval_name, eval_path = self.eval_sets[0] + console.info(f"\n⚔ Quick Run: {eval_name}") + + if self._confirm("Run evaluation now?"): + self._execute_evaluation(eval_path) + + def _list_eval_sets_navigation(self) -> None: + """List eval sets with navigation.""" + self._clear_screen() + console.info("šŸ“‹ Available Eval Sets") + console.info("─" * 65) + self._list_eval_sets() + input("\nPress Enter to continue...") + + def _run_specific_combination(self) -> None: + """Run specific eval set and evaluator combination.""" + self._clear_screen() + console.info("šŸŽÆ Run Specific Combination") + console.info("─" * 65) + + # Select eval set + console.info("\nšŸ“‹ Select Eval Set:") + for i, (name, _) in enumerate(self.eval_sets, 1): + console.info(f" {i}. {name}") + + try: + eval_idx = int(input("\nāž¤ Eval Set Number: ").strip()) - 1 + if not (0 <= eval_idx < len(self.eval_sets)): + console.error("Invalid selection") + input("\nPress Enter to continue...") + return + + eval_name, eval_path = self.eval_sets[eval_idx] + + console.info(f"\nāœ… Selected: {eval_name}") + if self._confirm("Run evaluation now?"): + self._execute_evaluation_no_clear(eval_path) + except ValueError: + console.error("Invalid selection") + input("\nPress Enter to continue...") + + +def launch_interactive_cli(project_root: Optional[Path] = None) -> None: + """Launch the interactive CLI.""" + cli = InteractiveEvalCLI(project_root) + cli.run() diff --git a/src/uipath/_cli/_interactive/_navigation.py b/src/uipath/_cli/_interactive/_navigation.py new file mode 100644 index 000000000..66514716b --- /dev/null +++ b/src/uipath/_cli/_interactive/_navigation.py @@ -0,0 +1,109 @@ +"""Navigation and input handling for interactive CLI.""" + +import sys +import termios +import tty + +from .._utils._console import ConsoleLogger + +console = ConsoleLogger() + + +def has_termios() -> bool: + """Check if we have termios support for advanced input.""" + try: + termios.tcgetattr(sys.stdin) + return True + except Exception: + return False + + +HAS_NAVIGATION = has_termios() + + +class NavigationMixin: + """Mixin for navigation and input handling.""" + + def _clear_screen(self) -> None: + """Clear the screen.""" + print("\033[2J\033[H", end="") + + def _get_input(self, prompt: str) -> str: + """Get input from user.""" + return input(prompt).strip() + + def _get_key_input(self) -> str: + """Get key input with arrow key support.""" + if not HAS_NAVIGATION: + return input("āž¤ ").strip().lower() + + old_settings = termios.tcgetattr(sys.stdin) + try: + tty.setraw(sys.stdin) + + # Read first character + char = sys.stdin.read(1) + + # Check for escape sequences (arrow keys) + if char == '\x1b': # ESC + next_char = sys.stdin.read(1) + if next_char == '[': + arrow = sys.stdin.read(1) + if arrow == 'A': + return 'up' + elif arrow == 'B': + return 'down' + return '' + + # Backspace handling + if char == '\x7f': # Backspace (DEL) + return 'back' + elif char == '\x08': # Backspace (BS) + return 'back' + + # Enter key + if char in ['\r', '\n']: + return 'enter' + + # Digit keys + elif char.isdigit() and 1 <= int(char) <= 6: + return char + elif char == '\x03': # Ctrl+C + raise KeyboardInterrupt + + return '' + except Exception: + return input("āž¤ ").strip().lower() + finally: + # Restore terminal settings + try: + termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings) + except Exception: + pass + + def _show_ascii_art(self) -> None: + """Display ASCII art banner.""" + art = """ + ā–ˆā–ˆā•— ā–ˆā–ˆā•—ā–ˆā–ˆā•—ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•— ā–ˆā–ˆā–ˆā–ˆā–ˆā•— ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•—ā–ˆā–ˆā•— ā–ˆā–ˆā•— + ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•—ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•—ā•šā•ā•ā–ˆā–ˆā•”ā•ā•ā•ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ + ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•”ā•ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•‘ + ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ā–ˆā–ˆā•”ā•ā•ā•ā• ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•”ā•ā•ā–ˆā–ˆā•‘ + ā•šā–ˆā–ˆā–ˆā–ˆā–ˆā–ˆā•”ā•ā–ˆā–ˆā•‘ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ ā–ˆā–ˆā•‘ + ā•šā•ā•ā•ā•ā•ā• ā•šā•ā•ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• ā•šā•ā• + + Evaluation Builder + Interactive Evaluation Toolkit + """ + console.info(art) + + def _show_menu(self, current_selection: int, menu_items: list[str]) -> None: + """Show menu with current selection highlighted.""" + console.info("\nāš™ļø Main Menu:") + console.info("─" * 65) + for i, item in enumerate(menu_items): + if i == current_selection: + console.info(f" ā–¶ {item}") + else: + console.info(f" {item}") + console.info("\nšŸ’” Use ↑/↓ arrows to navigate, Enter to select, or type 1-6") + console.info("Press Ctrl+C to exit") diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py index 70debd662..546d6dd63 100644 --- a/src/uipath/_cli/cli_eval.py +++ b/src/uipath/_cli/cli_eval.py @@ -139,7 +139,7 @@ def eval( # Handle interactive mode if interactive: try: - from ._eval_interactive import launch_interactive_cli + from ._interactive import launch_interactive_cli launch_interactive_cli() return except ImportError as e: From d18dd75ebeef81d156c4c6f143378a1a76f15479 Mon Sep 17 00:00:00 2001 From: Chibi Vikramathithan Date: Tue, 30 Sep 2025 21:03:17 -0700 Subject: [PATCH 6/6] fix: apply ruff formatting to interactive module - Formatted all files in _interactive/ module with ruff - Ensures consistent code style across the codebase --- src/uipath/_cli/_interactive/_discovery.py | 4 +- src/uipath/_cli/_interactive/_drill_down.py | 32 ++++---- src/uipath/_cli/_interactive/_eval_sets.py | 84 +++++++++++++-------- src/uipath/_cli/_interactive/_evaluators.py | 64 ++++++++++------ src/uipath/_cli/_interactive/_execution.py | 42 ++++++++--- src/uipath/_cli/_interactive/_main.py | 36 +++++---- src/uipath/_cli/_interactive/_navigation.py | 30 ++++---- src/uipath/_cli/cli_eval.py | 5 +- 8 files changed, 184 insertions(+), 113 deletions(-) diff --git a/src/uipath/_cli/_interactive/_discovery.py b/src/uipath/_cli/_interactive/_discovery.py index b0b4a68b1..08ea55d84 100644 --- a/src/uipath/_cli/_interactive/_discovery.py +++ b/src/uipath/_cli/_interactive/_discovery.py @@ -25,7 +25,9 @@ def _discover_files(self: "InteractiveEvalCLI") -> None: with open(eval_file) as f: data = json.load(f) # Check if it's an eval set by presence of "evaluations" array - if "evaluations" in data and isinstance(data.get("evaluations"), list): + if "evaluations" in data and isinstance( + data.get("evaluations"), list + ): name = data.get("name", eval_file.stem) self.eval_sets.append((name, eval_file)) except Exception: diff --git a/src/uipath/_cli/_interactive/_drill_down.py b/src/uipath/_cli/_interactive/_drill_down.py index 6bdcf7453..a200054b0 100644 --- a/src/uipath/_cli/_interactive/_drill_down.py +++ b/src/uipath/_cli/_interactive/_drill_down.py @@ -24,25 +24,27 @@ def _drill_down_eval_sets(self: "InteractiveEvalCLI") -> None: while True: self._clear_screen() console.info("šŸ“‹ Eval Sets - Navigate & Select") - console.info("āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back") + console.info( + "āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back" + ) console.info("─" * 65) for i, (name, path) in enumerate(self.eval_sets): if i == current_selection: - console.info(f"ā–ŗ {i+1}. {name} ā—„") + console.info(f"ā–ŗ {i + 1}. {name} ā—„") self._show_eval_set_preview(path) else: - console.info(f" {i+1}. {name}") + console.info(f" {i + 1}. {name}") key = self._get_key_input() - if key in ['q', 'Q', 'back']: + if key in ["q", "Q", "back"]: break - elif key == 'up': + elif key == "up": current_selection = (current_selection - 1) % len(self.eval_sets) - elif key == 'down': + elif key == "down": current_selection = (current_selection + 1) % len(self.eval_sets) - elif key in ['enter', ' ']: + elif key in ["enter", " "]: self._show_eval_set_details(self.eval_sets[current_selection]) elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets): current_selection = int(key) - 1 @@ -57,25 +59,27 @@ def _drill_down_evaluators(self: "InteractiveEvalCLI") -> None: while True: self._clear_screen() console.info("āš™ļø Evaluators - Navigate & Select") - console.info("āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back") + console.info( + "āŒØļø Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back" + ) console.info("─" * 65) for i, (name, path) in enumerate(self.evaluators): if i == current_selection: - console.info(f"ā–ŗ {i+1}. {name} ā—„") + console.info(f"ā–ŗ {i + 1}. {name} ā—„") self._show_evaluator_preview(path) else: - console.info(f" {i+1}. {name}") + console.info(f" {i + 1}. {name}") key = self._get_key_input() - if key in ['q', 'Q', 'back']: + if key in ["q", "Q", "back"]: break - elif key == 'up': + elif key == "up": current_selection = (current_selection - 1) % len(self.evaluators) - elif key == 'down': + elif key == "down": current_selection = (current_selection + 1) % len(self.evaluators) - elif key in ['enter', ' ']: + elif key in ["enter", " "]: self._show_evaluator_details(self.evaluators[current_selection]) elif key.isdigit() and 1 <= int(key) <= len(self.evaluators): current_selection = int(key) - 1 diff --git a/src/uipath/_cli/_interactive/_eval_sets.py b/src/uipath/_cli/_interactive/_eval_sets.py index 9f6382149..2ac1da8df 100644 --- a/src/uipath/_cli/_interactive/_eval_sets.py +++ b/src/uipath/_cli/_interactive/_eval_sets.py @@ -39,15 +39,17 @@ def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None: "batchSize": 10, "timeoutMinutes": 20, "modelSettings": [], - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "evaluations": [] + "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "evaluations": [], } # Ask if they want to add evaluations add_evals = self._get_input("Add evaluations now? (y/n): ").lower() - if add_evals in ['y', 'yes']: - eval_set["evaluations"] = self._add_evaluations_interactive(str(eval_set["id"])) + if add_evals in ["y", "yes"]: + eval_set["evaluations"] = self._add_evaluations_interactive( + str(eval_set["id"]) + ) # Ensure evaluationSets directory exists eval_sets_dir = self.project_root / "evaluationSets" @@ -56,7 +58,7 @@ def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None: # Save file file_path = eval_sets_dir / filename - with open(file_path, 'w') as f: + with open(file_path, "w") as f: json.dump(eval_set, f, indent=2) console.success(f"āœ… Created eval set: {filename}") @@ -87,12 +89,16 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None: evaluator_refs = [] if self.evaluators: - refs_input = input("āž¤ Select evaluators (comma-separated numbers, or 'all'): ").strip() - if refs_input.lower() == 'all': - evaluator_refs = [self._get_evaluator_id(path) for eval_name, path in self.evaluators] + refs_input = input( + "āž¤ Select evaluators (comma-separated numbers, or 'all'): " + ).strip() + if refs_input.lower() == "all": + evaluator_refs = [ + self._get_evaluator_id(path) for eval_name, path in self.evaluators + ] elif refs_input: try: - for num in refs_input.split(','): + for num in refs_input.split(","): idx = int(num.strip()) - 1 if 0 <= idx < len(self.evaluators): eval_path = self.evaluators[idx][1] @@ -109,7 +115,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None: while True: console.info(f"\nTest Case #{test_count}") test_name = input("āž¤ Test Name (or 'done' to finish): ").strip() - if test_name.lower() == 'done': + if test_name.lower() == "done": break if not test_name: @@ -118,7 +124,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None: # Inputs console.info("šŸ“„ Inputs (JSON format)") - console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}") + console.info('Examples: {"a": 5, "b": 3} or {"query": "hello world"}') inputs_str = input("āž¤ Inputs: ").strip() try: inputs = json.loads(inputs_str) if inputs_str else {} @@ -147,8 +153,12 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None: "simulateTools": False, "toolsToSimulate": [], "evalSetId": f"eval-{len(self.eval_sets) + 1}", - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + "createdAt": datetime.now(timezone.utc) + .isoformat() + .replace("+00:00", "Z"), + "updatedAt": datetime.now(timezone.utc) + .isoformat() + .replace("+00:00", "Z"), } evaluations.append(evaluation) test_count += 1 @@ -167,9 +177,9 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None: "batchSize": 10, "timeoutMinutes": 20, "modelSettings": [], - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "evaluations": evaluations + "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "evaluations": evaluations, } # Ensure evaluationSets directory exists @@ -180,7 +190,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None: file_path = eval_sets_dir / filename try: - with open(file_path, 'w') as f: + with open(file_path, "w") as f: json.dump(eval_set, f, indent=2) console.success(f"\nāœ… Created eval set: {filename}") @@ -193,7 +203,9 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None: input("\nPress Enter to continue...") - def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -> List[Dict[str, Any]]: + def _add_evaluations_interactive( + self: "InteractiveEvalCLI", eval_set_id: str + ) -> List[Dict[str, Any]]: """Add evaluations interactively.""" evaluations = [] test_count = 1 @@ -201,7 +213,7 @@ def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) - while True: console.info(f"\nTest Case #{test_count}") test_name = self._get_input("Test Name (or 'done' to finish): ") - if test_name.lower() == 'done': + if test_name.lower() == "done": break if not test_name: @@ -210,7 +222,7 @@ def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) - # Inputs console.info("šŸ“„ Inputs (JSON format)") - console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}") + console.info('Examples: {"a": 5, "b": 3} or {"query": "hello world"}') inputs_str = input("āž¤ Inputs: ").strip() try: inputs = json.loads(inputs_str) if inputs_str else {} @@ -239,8 +251,12 @@ def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) - "simulateTools": False, "toolsToSimulate": [], "evalSetId": eval_set_id, - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + "createdAt": datetime.now(timezone.utc) + .isoformat() + .replace("+00:00", "Z"), + "updatedAt": datetime.now(timezone.utc) + .isoformat() + .replace("+00:00", "Z"), } evaluations.append(evaluation) test_count += 1 @@ -278,7 +294,9 @@ def _show_eval_set_preview(self: "InteractiveEvalCLI", path: Path) -> None: except Exception: console.info(f" šŸ“„ {path.name} (error loading)") - def _show_eval_set_details(self: "InteractiveEvalCLI", eval_set_tuple: tuple[str, Path]) -> None: + def _show_eval_set_details( + self: "InteractiveEvalCLI", eval_set_tuple: tuple[str, Path] + ) -> None: """Show detailed eval set view.""" name, path = eval_set_tuple self._clear_screen() @@ -296,26 +314,26 @@ def _show_eval_set_details(self: "InteractiveEvalCLI", eval_set_tuple: tuple[str console.info(f"šŸ“¦ Batch Size: {data.get('batchSize', 'Unknown')}") console.info(f"ā±ļø Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes") - evaluator_refs = data.get('evaluatorRefs', []) + evaluator_refs = data.get("evaluatorRefs", []) if evaluator_refs: console.info("\nšŸŽÆ Evaluator References:") for ref in evaluator_refs: console.info(f" • {ref}") - evaluations = data.get('evaluations', []) + evaluations = data.get("evaluations", []) if evaluations: console.info("\nšŸ“ Test Cases:") for i, eval_data in enumerate(evaluations[:10], 1): # Show first 10 - test_name = eval_data.get('name', f'Test {i}') + test_name = eval_data.get("name", f"Test {i}") console.info(f" {i}. {test_name}") - if 'inputs' in eval_data: - inputs_preview = str(eval_data['inputs'])[:60] - if len(str(eval_data['inputs'])) > 60: + if "inputs" in eval_data: + inputs_preview = str(eval_data["inputs"])[:60] + if len(str(eval_data["inputs"])) > 60: inputs_preview += "..." console.info(f" Input: {inputs_preview}") - if 'expectedOutput' in eval_data: - output_preview = str(eval_data['expectedOutput'])[:60] - if len(str(eval_data['expectedOutput'])) > 60: + if "expectedOutput" in eval_data: + output_preview = str(eval_data["expectedOutput"])[:60] + if len(str(eval_data["expectedOutput"])) > 60: output_preview += "..." console.info(f" Expected: {output_preview}") diff --git a/src/uipath/_cli/_interactive/_evaluators.py b/src/uipath/_cli/_interactive/_evaluators.py index bb2968569..541a5bbf1 100644 --- a/src/uipath/_cli/_interactive/_evaluators.py +++ b/src/uipath/_cli/_interactive/_evaluators.py @@ -35,8 +35,8 @@ def _create_evaluator_simple(self: "InteractiveEvalCLI") -> None: "category": 0, "type": 1, "targetOutputKey": "*", - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), } # Ensure evaluators directory exists @@ -47,7 +47,7 @@ def _create_evaluator_simple(self: "InteractiveEvalCLI") -> None: filename = f"{name.lower().replace(' ', '_')}.json" file_path = evaluators_dir / filename - with open(file_path, 'w') as f: + with open(file_path, "w") as f: json.dump(evaluator, f, indent=2) console.success(f"āœ… Created evaluator: {filename}") @@ -75,7 +75,7 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None: 0: "Deterministic", 1: "LLM as Judge", 2: "Agent Scorer", - 3: "Trajectory" + 3: "Trajectory", } for key, value in categories.items(): @@ -91,14 +91,25 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None: # Type Selection console.info(f"\nšŸŽÆ Type Selection (Category: {categories[category]})") types = { - 0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex", - 4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory" + 0: "Unknown", + 1: "Exact Match", + 2: "Contains", + 3: "Regex", + 4: "Factuality", + 5: "Custom", + 6: "JSON Similarity", + 7: "Trajectory", } # Show relevant types based on category relevant_types = [] if category == 0: # Deterministic - relevant_types = [1, 2, 3, 6] # Exact Match, Contains, Regex, JSON Similarity + relevant_types = [ + 1, + 2, + 3, + 6, + ] # Exact Match, Contains, Regex, JSON Similarity elif category == 1: # LLM as Judge relevant_types = [4, 5] # Factuality, Custom elif category == 3: # Trajectory @@ -110,7 +121,10 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None: console.info(f" {type_id}. {types[type_id]}") try: - eval_type = int(input(f"āž¤ Select Type ({', '.join(map(str, relevant_types))}): ") or str(relevant_types[0])) + eval_type = int( + input(f"āž¤ Select Type ({', '.join(map(str, relevant_types))}): ") + or str(relevant_types[0]) + ) if eval_type not in relevant_types: eval_type = relevant_types[0] except (ValueError, IndexError): @@ -118,7 +132,9 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None: # Target Output Key console.info("\nšŸ” Target Configuration") - console.info("Target Output Key determines which part of the output to evaluate") + console.info( + "Target Output Key determines which part of the output to evaluate" + ) console.info("Examples: '*' (all), 'result', 'answer', 'output'") target_key = input("āž¤ Target Output Key (default: '*'): ").strip() or "*" @@ -130,8 +146,8 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None: "category": category, "type": eval_type, "targetOutputKey": target_key, - "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'), - "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z') + "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), + "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"), } # LLM Configuration (if LLM as Judge) @@ -148,7 +164,7 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None: "modelName": model_name, "prompt": prompt, "temperature": 0.0, - "maxTokens": 1000 + "maxTokens": 1000, } # Ensure evaluators directory exists @@ -160,7 +176,7 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None: file_path = evaluators_dir / filename try: - with open(file_path, 'w') as f: + with open(file_path, "w") as f: json.dump(evaluator, f, indent=2) console.success(f"\nāœ… Created evaluator: {filename}") @@ -205,7 +221,9 @@ def _show_evaluator_preview(self: "InteractiveEvalCLI", path: Path) -> None: except Exception: console.info(f" šŸ“„ {path.name} (error loading)") - def _show_evaluator_details(self: "InteractiveEvalCLI", evaluator_tuple: tuple[str, Path]) -> None: + def _show_evaluator_details( + self: "InteractiveEvalCLI", evaluator_tuple: tuple[str, Path] + ) -> None: """Show detailed evaluator view.""" name, path = evaluator_tuple self._clear_screen() @@ -219,17 +237,19 @@ def _show_evaluator_details(self: "InteractiveEvalCLI", evaluator_tuple: tuple[s console.info(f"\nšŸ“„ {path.name}") console.info(f"šŸ†” ID: {data.get('id', 'Unknown')}") console.info(f"šŸ“ Description: {data.get('description', 'No description')}") - console.info(f"šŸ·ļø Category: {self._get_category_name(data.get('category', 0))}") + console.info( + f"šŸ·ļø Category: {self._get_category_name(data.get('category', 0))}" + ) console.info(f"šŸŽÆ Type: {self._get_type_name(data.get('type', 1))}") console.info(f"šŸ” Target Key: {data.get('targetOutputKey', '*')}") - if 'llmConfig' in data: - llm_config = data['llmConfig'] + if "llmConfig" in data: + llm_config = data["llmConfig"] console.info("\nšŸ¤– LLM Configuration:") console.info(f" Model: {llm_config.get('modelName', 'Unknown')}") - if 'prompt' in llm_config: - prompt_preview = llm_config['prompt'][:100] - if len(llm_config['prompt']) > 100: + if "prompt" in llm_config: + prompt_preview = llm_config["prompt"][:100] + if len(llm_config["prompt"]) > 100: prompt_preview += "..." console.info(f" Prompt: {prompt_preview}") @@ -245,7 +265,7 @@ def _get_category_name(self: "InteractiveEvalCLI", category: int) -> str: 0: "Deterministic", 1: "LLM as Judge", 2: "Agent Scorer", - 3: "Trajectory" + 3: "Trajectory", } return categories.get(category, "Unknown") @@ -259,7 +279,7 @@ def _get_type_name(self: "InteractiveEvalCLI", eval_type: int) -> str: 4: "Factuality", 5: "Custom", 6: "JSON Similarity", - 7: "Trajectory" + 7: "Trajectory", } return types.get(eval_type, "Unknown") diff --git a/src/uipath/_cli/_interactive/_execution.py b/src/uipath/_cli/_interactive/_execution.py index f2283bc2b..8152fb3a5 100644 --- a/src/uipath/_cli/_interactive/_execution.py +++ b/src/uipath/_cli/_interactive/_execution.py @@ -29,13 +29,19 @@ def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None: # Build command - run from the project directory cmd = [ - sys.executable, "-m", "uipath._cli.cli_eval", + sys.executable, + "-m", + "uipath._cli.cli_eval", str(main_py.relative_to(self.project_root)), str(eval_path.relative_to(self.project_root)), - "--no-report", "--workers", "1" + "--no-report", + "--workers", + "1", ] - console.info(f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report") + console.info( + f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report" + ) try: # Run with real-time output from project directory @@ -46,7 +52,7 @@ def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None: text=True, bufsize=1, universal_newlines=True, - cwd=self.project_root + cwd=self.project_root, ) # Stream output in real-time @@ -59,12 +65,16 @@ def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None: if process.returncode == 0: console.success("\nāœ… Evaluation completed successfully!") else: - console.error(f"\nāŒ Evaluation failed (exit code: {process.returncode})") + console.error( + f"\nāŒ Evaluation failed (exit code: {process.returncode})" + ) except Exception as e: console.error(f"Failed to run evaluation: {e}") - def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) -> None: + def _execute_evaluation_no_clear( + self: "InteractiveEvalCLI", eval_path: Path + ) -> None: """Execute evaluation without clearing screen.""" console.info("\nšŸš€ Running evaluation...") @@ -77,13 +87,19 @@ def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) -> # Build command - run from the project directory cmd = [ - sys.executable, "-m", "uipath._cli.cli_eval", + sys.executable, + "-m", + "uipath._cli.cli_eval", str(main_py.relative_to(self.project_root)), str(eval_path.relative_to(self.project_root)), - "--no-report", "--workers", "1" + "--no-report", + "--workers", + "1", ] - console.info(f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report") + console.info( + f"šŸ’» Command: uipath eval {main_py.name} {eval_path.name} --no-report" + ) try: # Run with real-time output from project directory @@ -94,7 +110,7 @@ def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) -> text=True, bufsize=1, universal_newlines=True, - cwd=self.project_root + cwd=self.project_root, ) # Stream output in real-time @@ -107,7 +123,9 @@ def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) -> if process.returncode == 0: console.success("\nāœ… Evaluation completed successfully!") else: - console.error(f"\nāŒ Evaluation failed (exit code: {process.returncode})") + console.error( + f"\nāŒ Evaluation failed (exit code: {process.returncode})" + ) except Exception as e: console.error(f"Failed to run evaluation: {e}") @@ -132,4 +150,4 @@ def _find_main_py(self: "InteractiveEvalCLI") -> Optional[Path]: def _confirm(self: "InteractiveEvalCLI", prompt: str) -> bool: """Ask for confirmation.""" response = self._get_input(f"{prompt} (y/n): ").lower() - return response in ['y', 'yes'] + return response in ["y", "yes"] diff --git a/src/uipath/_cli/_interactive/_main.py b/src/uipath/_cli/_interactive/_main.py index c15afc2e0..c41f4023a 100644 --- a/src/uipath/_cli/_interactive/_main.py +++ b/src/uipath/_cli/_interactive/_main.py @@ -20,7 +20,7 @@ class InteractiveEvalCLI( EvalSetMixin, EvaluatorMixin, ExecutionMixin, - DrillDownMixin + DrillDownMixin, ): """Simple, fast, keyboard-driven evaluation CLI.""" @@ -35,7 +35,7 @@ def __init__(self, project_root: Optional[Path] = None): "⚔ Quick run (auto-select)", "āž• Create eval set", "āž• Create evaluator", - "šŸŽÆ Run specific combination" + "šŸŽÆ Run specific combination", ] self._discover_files() @@ -44,7 +44,9 @@ def run(self) -> None: self._show_ascii_art() if not HAS_NAVIGATION: - console.warning("āš ļø Terminal navigation not available. Using fallback mode.") + console.warning( + "āš ļø Terminal navigation not available. Using fallback mode." + ) console.info("Consider using a standard terminal for better experience.\n") self._run_fallback_mode() return @@ -63,11 +65,15 @@ def _run_navigation_mode(self) -> None: key = self._get_key_input() - if key == 'up': - self.current_selection = (self.current_selection - 1) % len(self.menu_items) - elif key == 'down': - self.current_selection = (self.current_selection + 1) % len(self.menu_items) - elif key in ['enter', ' ']: + if key == "up": + self.current_selection = (self.current_selection - 1) % len( + self.menu_items + ) + elif key == "down": + self.current_selection = (self.current_selection + 1) % len( + self.menu_items + ) + elif key in ["enter", " "]: self._execute_menu_item_with_navigation(self.current_selection) elif key.isdigit() and 1 <= int(key) <= 6: self._execute_menu_item_with_navigation(int(key) - 1) @@ -98,20 +104,20 @@ def _run_fallback_mode(self) -> None: try: choice = input("\nāž¤ Select option: ").strip() - if choice == '0': + if choice == "0": console.info("šŸ‘‹ Goodbye!") break - elif choice == '1': + elif choice == "1": self._list_eval_sets_navigation() - elif choice == '2': + elif choice == "2": self._list_evaluators() - elif choice == '3': + elif choice == "3": self._quick_run() - elif choice == '4': + elif choice == "4": self._create_eval_set_simple() - elif choice == '5': + elif choice == "5": self._create_evaluator_simple() - elif choice == '6': + elif choice == "6": self._run_specific_combination() else: console.warning("Invalid option") diff --git a/src/uipath/_cli/_interactive/_navigation.py b/src/uipath/_cli/_interactive/_navigation.py index 66514716b..4f8077ca0 100644 --- a/src/uipath/_cli/_interactive/_navigation.py +++ b/src/uipath/_cli/_interactive/_navigation.py @@ -45,33 +45,33 @@ def _get_key_input(self) -> str: char = sys.stdin.read(1) # Check for escape sequences (arrow keys) - if char == '\x1b': # ESC + if char == "\x1b": # ESC next_char = sys.stdin.read(1) - if next_char == '[': + if next_char == "[": arrow = sys.stdin.read(1) - if arrow == 'A': - return 'up' - elif arrow == 'B': - return 'down' - return '' + if arrow == "A": + return "up" + elif arrow == "B": + return "down" + return "" # Backspace handling - if char == '\x7f': # Backspace (DEL) - return 'back' - elif char == '\x08': # Backspace (BS) - return 'back' + if char == "\x7f": # Backspace (DEL) + return "back" + elif char == "\x08": # Backspace (BS) + return "back" # Enter key - if char in ['\r', '\n']: - return 'enter' + if char in ["\r", "\n"]: + return "enter" # Digit keys elif char.isdigit() and 1 <= int(char) <= 6: return char - elif char == '\x03': # Ctrl+C + elif char == "\x03": # Ctrl+C raise KeyboardInterrupt - return '' + return "" except Exception: return input("āž¤ ").strip().lower() finally: diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py index 546d6dd63..bfdcdbcde 100644 --- a/src/uipath/_cli/cli_eval.py +++ b/src/uipath/_cli/cli_eval.py @@ -68,7 +68,9 @@ def _display_local_results(results_data): result = evaluator_result.get("result", {}) score = result.get("score", 0.0) eval_time = result.get("evaluationTime", 0.0) - console.info(f" └─ {evaluator_name}: {score:.1f}% ({eval_time*1000:.2f}ms)") + console.info( + f" └─ {evaluator_name}: {score:.1f}% ({eval_time * 1000:.2f}ms)" + ) console.info(f"\nšŸŽÆ Summary: {int(passed_count)}/{total_count} tests passed") if overall_score == 100.0: @@ -140,6 +142,7 @@ def eval( if interactive: try: from ._interactive import launch_interactive_cli + launch_interactive_cli() return except ImportError as e: