From e7dff17b2c1bb8c50ec36e4e07162553894304c3 Mon Sep 17 00:00:00 2001
From: Chibi Vikramathithan <chibi.chakaravarthi@uipath.com>
Date: Tue, 30 Sep 2025 17:55:18 -0700
Subject: [PATCH 1/6] fix: adding interactive mode for eval

---
 .../comprehensive_calculator_tests.json       |  118 ++
 .../calculator/evaluators/exact_match.json    |   10 +
 .../evaluators/json_similarity.json           |   10 +
 src/uipath/_cli/_eval_interactive.py          | 1199 +++++++++++++++++
 src/uipath/_cli/_utils/_eval_set.py           |   15 +-
 src/uipath/_cli/cli_eval.py                   |   77 ++
 6 files changed, 1424 insertions(+), 5 deletions(-)
 create mode 100644 samples/calculator/evaluationSets/comprehensive_calculator_tests.json
 create mode 100644 samples/calculator/evaluators/exact_match.json
 create mode 100644 samples/calculator/evaluators/json_similarity.json
 create mode 100644 src/uipath/_cli/_eval_interactive.py

diff --git a/samples/calculator/evaluationSets/comprehensive_calculator_tests.json b/samples/calculator/evaluationSets/comprehensive_calculator_tests.json
new file mode 100644
index 000000000..f8c941cb2
--- /dev/null
+++ b/samples/calculator/evaluationSets/comprehensive_calculator_tests.json
@@ -0,0 +1,118 @@
+{
+  "id": "calc-comprehensive-001",
+  "fileName": "comprehensive_eval_set.json",
+  "evaluatorRefs": ["exact-match-eval", "json-similarity-eval"],
+  "name": "Comprehensive Calculator Tests",
+  "batchSize": 10,
+  "timeoutMinutes": 10,
+  "modelSettings": [],
+  "createdAt": "2025-01-25T00:00:00Z",
+  "updatedAt": "2025-01-25T00:00:00Z",
+  "evaluations": [
+    {
+      "id": "add-basic",
+      "name": "Basic Addition",
+      "inputs": {
+        "a": 5,
+        "b": 3,
+        "operator": "+"
+      },
+      "expectedOutput": {
+        "result": 8.0
+      },
+      "expectedAgentBehavior": "Add two positive numbers",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    },
+    {
+      "id": "sub-basic",
+      "name": "Basic Subtraction",
+      "inputs": {
+        "a": 10,
+        "b": 4,
+        "operator": "-"
+      },
+      "expectedOutput": {
+        "result": 6.0
+      },
+      "expectedAgentBehavior": "Subtract smaller from larger",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    },
+    {
+      "id": "mul-basic",
+      "name": "Basic Multiplication",
+      "inputs": {
+        "a": 7,
+        "b": 6,
+        "operator": "*"
+      },
+      "expectedOutput": {
+        "result": 42.0
+      },
+      "expectedAgentBehavior": "Multiply two integers",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    },
+    {
+      "id": "div-basic",
+      "name": "Basic Division",
+      "inputs": {
+        "a": 15,
+        "b": 3,
+        "operator": "/"
+      },
+      "expectedOutput": {
+        "result": 5.0
+      },
+      "expectedAgentBehavior": "Divide evenly",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    },
+    {
+      "id": "div-zero",
+      "name": "Division by Zero",
+      "inputs": {
+        "a": 10,
+        "b": 0,
+        "operator": "/"
+      },
+      "expectedOutput": {
+        "result": 0.0
+      },
+      "expectedAgentBehavior": "Handle division by zero",
+      "simulationInstructions": "",
+      "simulateInput": false,
+      "inputGenerationInstructions": "",
+      "simulateTools": false,
+      "toolsToSimulate": [],
+      "evalSetId": "calc-comprehensive-001",
+      "createdAt": "2025-01-25T00:00:00Z",
+      "updatedAt": "2025-01-25T00:00:00Z"
+    }
+  ]
+}
\ No newline at end of file
diff --git a/samples/calculator/evaluators/exact_match.json b/samples/calculator/evaluators/exact_match.json
new file mode 100644
index 000000000..4750fc819
--- /dev/null
+++ b/samples/calculator/evaluators/exact_match.json
@@ -0,0 +1,10 @@
+{
+  "id": "exact-match-eval",
+  "name": "Exact Match Evaluator",
+  "description": "Tests for exact output matches",
+  "category": 0,
+  "type": 1,
+  "targetOutputKey": "*",
+  "createdAt": "2025-01-25T00:00:00Z",
+  "updatedAt": "2025-01-25T00:00:00Z"
+}
diff --git a/samples/calculator/evaluators/json_similarity.json b/samples/calculator/evaluators/json_similarity.json
new file mode 100644
index 000000000..b1fac450e
--- /dev/null
+++ b/samples/calculator/evaluators/json_similarity.json
@@ -0,0 +1,10 @@
+{
+  "id": "json-similarity-eval",
+  "name": "JSON Similarity Evaluator",
+  "description": "Tests for structural JSON similarity with tolerance",
+  "category": 0,
+  "type": 6,
+  "targetOutputKey": "*",
+  "createdAt": "2025-01-25T00:00:00Z",
+  "updatedAt": "2025-01-25T00:00:00Z"
+}
diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py
new file mode 100644
index 000000000..bb2872ab5
--- /dev/null
+++ b/src/uipath/_cli/_eval_interactive.py
@@ -0,0 +1,1199 @@
+"""Simple interactive CLI for evaluations - keyboard only, no mouse."""
+
+import json
+import subprocess
+import sys
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+import select
+import sys
+import termios
+import tty
+
+def has_termios() -> bool:
+    """Check if we have termios support for advanced input."""
+    try:
+        termios.tcgetattr(sys.stdin)
+        return True
+    except:
+        return False
+
+HAS_NAVIGATION = has_termios()
+
+from ._utils._console import ConsoleLogger
+
+console = ConsoleLogger()
+
+
+class InteractiveEvalCLI:
+    """Simple, fast, keyboard-driven evaluation CLI."""
+
+    def __init__(self, project_root: Path = None):
+        self.project_root = project_root or Path.cwd()
+        self.eval_sets: List[Tuple[str, Path]] = []
+        self.evaluators: List[Tuple[str, Path]] = []
+        self.current_selection = 0
+        self.menu_items = [
+            "📋 List eval sets",
+            "⚙️  List evaluators",
+            "⚡ Quick run (auto-select)",
+            "➕ Create eval set",
+            "➕ Create evaluator",
+            "🎯 Run specific combination"
+        ]
+        self._discover_files()
+
+    def _show_ascii_art(self):
+        """Display ASCII art banner."""
+        art = """
+  ██╗   ██╗██╗██████╗  █████╗ ████████╗██╗  ██╗
+  ██║   ██║██║██╔══██╗██╔══██╗╚══██╔══╝██║  ██║
+  ██║   ██║██║██████╔╝███████║   ██║   ███████║
+  ██║   ██║██║██╔═══╝ ██╔══██║   ██║   ██╔══██║
+  ╚██████╔╝██║██║     ██║  ██║   ██║   ██║  ██║
+   ╚═════╝ ╚═╝╚═╝     ╚═╝  ╚═╝   ╚═╝   ╚═╝  ╚═╝
+
+            Evaluation Builder
+        Interactive Evaluation Toolkit
+        """
+        console.info(art)
+
+    def _discover_files(self) -> None:
+        """Quickly discover eval sets and evaluators."""
+        # Clear existing lists to avoid duplicates
+        self.eval_sets.clear()
+        self.evaluators.clear()
+
+        # Find eval sets from evaluationSets folder
+        eval_sets_dir = self.project_root / "evaluationSets"
+        if eval_sets_dir.exists():
+            for eval_file in eval_sets_dir.glob("*.json"):
+                try:
+                    with open(eval_file) as f:
+                        data = json.load(f)
+                    # Check if it's an eval set by presence of "evaluations" array
+                    if "evaluations" in data and isinstance(data.get("evaluations"), list):
+                        name = data.get("name", eval_file.stem)
+                        self.eval_sets.append((name, eval_file))
+                except:
+                    pass
+
+        # Find evaluators from evaluators folder
+        evaluators_dir = self.project_root / "evaluators"
+        if evaluators_dir.exists():
+            for eval_file in evaluators_dir.glob("*.json"):
+                try:
+                    with open(eval_file) as f:
+                        data = json.load(f)
+                    # Verify it has evaluator-specific fields
+                    if "id" in data and "type" in data:
+                        name = data.get("name", eval_file.stem)
+                        self.evaluators.append((name, eval_file))
+                except:
+                    pass
+
+    def run(self) -> None:
+        """Run the interactive CLI."""
+        self._show_ascii_art()
+
+        if HAS_NAVIGATION:
+            self._run_with_navigation()
+        else:
+            self._run_basic()
+
+    def _run_with_navigation(self) -> None:
+        """Run with arrow key navigation."""
+        while True:
+            try:
+                self._clear_screen()
+                self._show_status()
+                self._show_navigable_menu()
+
+                # Get key input
+                key = self._get_key_input()
+
+                if key in ['q', 'Q']:
+                    console.info("👋 Goodbye!")
+                    break
+                elif key == 'up':
+                    self.current_selection = (self.current_selection - 1) % len(self.menu_items)
+                elif key == 'down':
+                    self.current_selection = (self.current_selection + 1) % len(self.menu_items)
+                elif key in ['enter', ' ']:
+                    self._execute_menu_item_with_navigation(self.current_selection)
+                elif key.isdigit() and 1 <= int(key) <= len(self.menu_items):
+                    self.current_selection = int(key) - 1
+                    self._execute_menu_item_with_navigation(self.current_selection)
+
+            except KeyboardInterrupt:
+                console.info("\n👋 Goodbye!")
+                break
+            except Exception as e:
+                console.error(f"Error: {e}")
+                self._get_input("\nPress Enter to continue...")
+
+    def _run_basic(self) -> None:
+        """Run basic mode without arrow keys."""
+        while True:
+            try:
+                self._show_status()
+                self._show_main_menu()
+                choice = self._get_input("\nChoice (1-6, q to quit): ").strip().lower()
+
+                if choice == 'q':
+                    console.info("👋 Goodbye!")
+                    break
+                elif choice.isdigit() and 1 <= int(choice) <= len(self.menu_items):
+                    self._execute_menu_item(int(choice) - 1)
+                else:
+                    console.warning("Invalid choice. Try again.")
+
+                if choice in ['1', '2']:
+                    self._get_input("\nPress Enter to continue...")
+
+            except KeyboardInterrupt:
+                console.info("\n👋 Goodbye!")
+                break
+            except Exception as e:
+                console.error(f"Error: {e}")
+
+    def _clear_screen(self) -> None:
+        """Clear the screen."""
+        import os
+        os.system('cls' if os.name == 'nt' else 'clear')
+        self._show_ascii_art()
+
+    def _show_status(self) -> None:
+        """Show project status."""
+        console.info(f"📁 Project: {self.project_root.name}")
+        console.info(f"📋 Eval Sets: {len(self.eval_sets)} | ⚙️  Evaluators: {len(self.evaluators)}")
+        console.info("─" * 65)
+
+    def _show_navigable_menu(self) -> None:
+        """Show menu with current selection highlighted."""
+        console.info("\n⌨️  Navigation: ↑↓ to navigate, Enter/Space to select, 1-6 for direct, q to quit, Backspace to go back")
+        console.info("─" * 65)
+
+        for i, item in enumerate(self.menu_items):
+            if i == self.current_selection:
+                console.info(f"► {i+1}. {item} ◄")
+            else:
+                console.info(f"  {i+1}. {item}")
+
+    def _get_key_input(self) -> str:
+        """Get key input with arrow key support."""
+        if not HAS_NAVIGATION:
+            return input("➤ ").strip().lower()
+
+        try:
+            # Set terminal to raw mode
+            old_settings = termios.tcgetattr(sys.stdin)
+            tty.setraw(sys.stdin)
+
+            char = sys.stdin.read(1)
+
+            # Handle escape sequences (arrow keys)
+            if char == '\x1b':  # ESC
+                char += sys.stdin.read(2)
+                if char == '\x1b[A':  # Up arrow
+                    return 'up'
+                elif char == '\x1b[B':  # Down arrow
+                    return 'down'
+                elif char == '\x1b[C':  # Right arrow
+                    return 'enter'
+                elif char == '\x1b[D':  # Left arrow
+                    return 'up'
+            elif char == '\r' or char == '\n':  # Enter
+                return 'enter'
+            elif char == ' ':  # Space
+                return 'enter'
+            elif char in ['q', 'Q']:
+                return 'q'
+            elif char == '\x7f':  # Backspace (DEL)
+                return 'back'
+            elif char == '\x08':  # Backspace (BS)
+                return 'back'
+            elif char.isdigit() and 1 <= int(char) <= 6:
+                return char
+            elif char == '\x03':  # Ctrl+C
+                raise KeyboardInterrupt
+
+            return ''
+        except:
+            return input("➤ ").strip().lower()
+        finally:
+            # Restore terminal settings
+            try:
+                termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
+            except:
+                pass
+
+    def _execute_menu_item_with_navigation(self, index: int) -> None:
+        """Execute menu item with navigation support."""
+        if index == 0:
+            self._drill_down_eval_sets()
+        elif index == 1:
+            self._drill_down_evaluators()
+        elif index == 2:
+            self._quick_run_no_clear()
+        elif index == 3:
+            self._create_eval_set_interactive()
+        elif index == 4:
+            self._create_evaluator_interactive()
+        elif index == 5:
+            self._run_specific_navigation()
+
+    def _execute_menu_item(self, index: int) -> None:
+        """Execute selected menu item (basic mode)."""
+        if index == 0:
+            self._list_eval_sets()
+        elif index == 1:
+            self._list_evaluators()
+        elif index == 2:
+            self._quick_run()
+        elif index == 3:
+            self._create_eval_set()
+        elif index == 4:
+            self._create_evaluator()
+        elif index == 5:
+            self._run_specific()
+
+        if index in [0, 1]:
+            self._get_input("\nPress Enter to continue...")
+
+    def _show_main_menu(self) -> None:
+        """Show main menu options."""
+        console.info(f"\n📁 Project: {self.project_root.name}")
+        console.info(f"📋 Eval Sets: {len(self.eval_sets)} | ⚙️  Evaluators: {len(self.evaluators)}")
+        console.info("\n" + "─" * 50)
+        console.info("1. 📋 List eval sets")
+        console.info("2. ⚙️  List evaluators")
+        console.info("3. ⚡ Quick run (auto-select)")
+        console.info("4. ➕ Create eval set")
+        console.info("5. ➕ Create evaluator")
+        console.info("6. 🎯 Run specific combination")
+
+    def _list_eval_sets(self) -> None:
+        """List available evaluation sets."""
+        console.info("\n📋 Available Eval Sets:")
+        if not self.eval_sets:
+            console.warning("No eval sets found")
+            return
+
+        for i, (name, path) in enumerate(self.eval_sets, 1):
+            # Load test count
+            try:
+                with open(path) as f:
+                    data = json.load(f)
+                test_count = len(data.get("evaluations", []))
+                evaluator_count = len(data.get("evaluatorRefs", []))
+                console.info(f"{i}. {name}")
+                console.info(f"   Tests: {test_count} | Evaluators: {evaluator_count}")
+                console.info(f"   File: {path.name}")
+            except:
+                console.info(f"{i}. {name} (error loading)")
+
+    def _list_evaluators(self) -> None:
+        """List available evaluators."""
+        console.info("\n⚙️  Available Evaluators:")
+        if not self.evaluators:
+            console.warning("No evaluators found")
+            return
+
+        for i, (name, path) in enumerate(self.evaluators, 1):
+            try:
+                with open(path) as f:
+                    data = json.load(f)
+                category = self._get_category_name(data.get("category", 0))
+                type_name = self._get_type_name(data.get("type", 1))
+                console.info(f"{i}. {name}")
+                console.info(f"   Type: {category} | {type_name}")
+                console.info(f"   File: {path.name}")
+            except:
+                console.info(f"{i}. {name} (error loading)")
+
+    def _list_eval_sets_navigation(self) -> None:
+        """List eval sets with navigation."""
+        self._clear_screen()
+        console.info("📋 Available Eval Sets")
+        console.info("─" * 65)
+        self._list_eval_sets()
+        console.info("\n⌨️  Press any key to go back...")
+        self._get_key_input()
+
+    def _list_evaluators_navigation(self) -> None:
+        """List evaluators with navigation."""
+        self._clear_screen()
+        console.info("⚙️  Available Evaluators")
+        console.info("─" * 65)
+        self._list_evaluators()
+        console.info("\n⌨️  Press any key to go back...")
+        self._get_key_input()
+
+    def _quick_run(self) -> None:
+        """Quick run with auto-selection."""
+        if not self.eval_sets:
+            console.error("No eval sets found!")
+            return
+
+        if not self.evaluators:
+            console.error("No evaluators found!")
+            return
+
+        console.info("\n⚡ Quick Run:")
+
+        # Auto-select first eval set
+        eval_name, eval_path = self.eval_sets[0]
+        console.info(f"📋 Using eval set: {eval_name}")
+
+        # Auto-select all evaluators
+        console.info(f"⚙️  Using {len(self.evaluators)} evaluators")
+
+        if self._confirm("Run evaluation now?"):
+            self._execute_evaluation(eval_path)
+
+    def _quick_run_no_clear(self) -> None:
+        """Quick run without clearing screen."""
+        if not self.eval_sets:
+            console.error("No eval sets found!")
+            input("\nPress Enter to continue...")
+            return
+
+        if not self.evaluators:
+            console.error("No evaluators found!")
+            input("\nPress Enter to continue...")
+            return
+
+        console.info("\n⚡ Quick Run:")
+
+        # Auto-select first eval set
+        eval_name, eval_path = self.eval_sets[0]
+        console.info(f"📋 Using eval set: {eval_name}")
+
+        # Auto-select all evaluators
+        console.info(f"⚙️  Using {len(self.evaluators)} evaluators")
+
+        if self._confirm("Run evaluation now?"):
+            self._execute_evaluation_no_clear(eval_path)
+
+    def _run_specific(self) -> None:
+        """Run with specific selection."""
+        if not self.eval_sets or not self.evaluators:
+            console.error("Need both eval sets and evaluators!")
+            return
+
+        # Select eval set with navigation
+        eval_choice = self._select_from_list(self.eval_sets, "Eval Set")
+        if eval_choice is None:
+            return
+
+        eval_name, eval_path = self.eval_sets[eval_choice - 1]
+        console.success(f"Selected: {eval_name}")
+
+        # Confirm and run
+        if self._confirm("Run evaluation now?"):
+            self._execute_evaluation(eval_path)
+
+    def _run_specific_navigation(self) -> None:
+        """Run specific combination with navigation."""
+        if not self.eval_sets or not self.evaluators:
+            console.error("Need both eval sets and evaluators!")
+            input("\nPress Enter to continue...")
+            return
+
+        # Select eval set
+        self._clear_screen()
+        console.info("🎯 Select Evaluation Set")
+        console.info("─" * 65)
+        self._list_eval_sets()
+
+        choice = input("\n➤ Select eval set number (or q to cancel): ").strip()
+        if choice.lower() == 'q':
+            return
+
+        try:
+            eval_choice = int(choice)
+            if 1 <= eval_choice <= len(self.eval_sets):
+                eval_name, eval_path = self.eval_sets[eval_choice - 1]
+                console.success(f"Selected: {eval_name}")
+
+                if self._confirm("Run evaluation now?"):
+                    self._execute_evaluation_no_clear(eval_path)
+        except ValueError:
+            console.error("Invalid selection")
+            input("\nPress Enter to continue...")
+
+    def _execute_evaluation(self, eval_path: Path) -> None:
+        """Execute evaluation with live results."""
+        console.info("\n🚀 Running evaluation...")
+
+        # Find main.py
+        main_py = self._find_main_py()
+        if not main_py:
+            console.error("Could not find main.py")
+            return
+
+        # Build command - run from the project directory
+        cmd = [
+            sys.executable, "-m", "uipath._cli.cli_eval",
+            str(main_py.relative_to(self.project_root)),
+            str(eval_path.relative_to(self.project_root)),
+            "--no-report", "--workers", "1"
+        ]
+
+        console.info(f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report")
+
+        try:
+            # Run with real-time output from project directory
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1,
+                universal_newlines=True,
+                cwd=self.project_root
+            )
+
+            # Stream output in real-time
+            for line in process.stdout:
+                print(line.rstrip())
+
+            process.wait()
+
+            if process.returncode == 0:
+                console.success("\n✅ Evaluation completed successfully!")
+            else:
+                console.error(f"\n❌ Evaluation failed (exit code: {process.returncode})")
+
+        except Exception as e:
+            console.error(f"Failed to run evaluation: {e}")
+
+    def _execute_evaluation_no_clear(self, eval_path: Path) -> None:
+        """Execute evaluation without clearing screen."""
+        console.info("\n🚀 Running evaluation...")
+
+        # Find main.py
+        main_py = self._find_main_py()
+        if not main_py:
+            console.error("Could not find main.py")
+            input("\nPress Enter to continue...")
+            return
+
+        # Build command - run from the project directory
+        cmd = [
+            sys.executable, "-m", "uipath._cli.cli_eval",
+            str(main_py.relative_to(self.project_root)),
+            str(eval_path.relative_to(self.project_root)),
+            "--no-report", "--workers", "1"
+        ]
+
+        console.info(f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report")
+
+        try:
+            # Run with real-time output from project directory
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1,
+                universal_newlines=True,
+                cwd=self.project_root
+            )
+
+            # Stream output in real-time
+            for line in process.stdout:
+                print(line.rstrip())
+
+            process.wait()
+
+            if process.returncode == 0:
+                console.success("\n✅ Evaluation completed successfully!")
+            else:
+                console.error(f"\n❌ Evaluation failed (exit code: {process.returncode})")
+
+        except Exception as e:
+            console.error(f"Failed to run evaluation: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _find_main_py(self) -> Optional[Path]:
+        """Find main.py file."""
+        # Check current directory
+        main_py = self.project_root / "main.py"
+        if main_py.exists():
+            return main_py
+
+        # Check parent directories
+        for parent in self.project_root.parents:
+            main_py = parent / "main.py"
+            if main_py.exists():
+                return main_py
+
+        return None
+
+    def _get_input(self, prompt: str) -> str:
+        """Get user input with prompt."""
+        try:
+            return input(f"➤ {prompt}")
+        except KeyboardInterrupt:
+            raise
+
+    def _select_from_list(self, items: List[Tuple[str, Path]], title: str) -> Optional[int]:
+        """Interactive list selection."""
+        if not items:
+            console.warning(f"No {title.lower()} found")
+            return None
+
+        console.info(f"\n{title}:")
+        for i, (name, _) in enumerate(items, 1):
+            console.info(f"{i}. {name}")
+
+        try:
+            value = input(f"➤ {title} number: ")
+            num = int(value)
+            if 1 <= num <= len(items):
+                return num
+            else:
+                console.warning(f"Please enter a number between 1 and {len(items)}")
+                return None
+        except (ValueError, KeyboardInterrupt):
+            return None
+
+    def _confirm(self, message: str) -> bool:
+        """Get yes/no confirmation."""
+        response = self._get_input(f"{message} (y/n): ").lower()
+        return response in ['y', 'yes']
+
+    def _get_category_name(self, category: int) -> str:
+        """Get category name."""
+        names = {0: "Deterministic", 1: "LLM Judge", 2: "Agent Scorer", 3: "Trajectory"}
+        return names.get(category, "Unknown")
+
+    def _get_type_name(self, eval_type: int) -> str:
+        """Get type name."""
+        names = {
+            0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex",
+            4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory"
+        }
+        return names.get(eval_type, "Unknown")
+
+    def _drill_down_eval_sets(self) -> None:
+        """Drill down into eval sets with navigation."""
+        if not self.eval_sets:
+            self._show_no_items_screen("eval sets")
+            return
+
+        current_selection = 0
+        while True:
+            self._clear_screen()
+            console.info("📋 Eval Sets - Navigate & Select")
+            console.info("⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back")
+            console.info("─" * 65)
+
+            for i, (name, path) in enumerate(self.eval_sets):
+                if i == current_selection:
+                    console.info(f"► {i+1}. {name} ◄")
+                    self._show_eval_set_preview(path)
+                else:
+                    console.info(f"  {i+1}. {name}")
+
+            key = self._get_key_input()
+
+            if key in ['q', 'Q', 'back']:
+                break
+            elif key == 'up':
+                current_selection = (current_selection - 1) % len(self.eval_sets)
+            elif key == 'down':
+                current_selection = (current_selection + 1) % len(self.eval_sets)
+            elif key in ['enter', ' ']:
+                self._show_eval_set_details(self.eval_sets[current_selection])
+            elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets):
+                current_selection = int(key) - 1
+
+    def _drill_down_evaluators(self) -> None:
+        """Drill down into evaluators with navigation."""
+        if not self.evaluators:
+            self._show_no_items_screen("evaluators")
+            return
+
+        current_selection = 0
+        while True:
+            self._clear_screen()
+            console.info("⚙️  Evaluators - Navigate & Select")
+            console.info("⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back")
+            console.info("─" * 65)
+
+            for i, (name, path) in enumerate(self.evaluators):
+                if i == current_selection:
+                    console.info(f"► {i+1}. {name} ◄")
+                    self._show_evaluator_preview(path)
+                else:
+                    console.info(f"  {i+1}. {name}")
+
+            key = self._get_key_input()
+
+            if key in ['q', 'Q', 'back']:
+                break
+            elif key == 'up':
+                current_selection = (current_selection - 1) % len(self.evaluators)
+            elif key == 'down':
+                current_selection = (current_selection + 1) % len(self.evaluators)
+            elif key in ['enter', ' ']:
+                self._show_evaluator_details(self.evaluators[current_selection])
+            elif key.isdigit() and 1 <= int(key) <= len(self.evaluators):
+                current_selection = int(key) - 1
+
+    def _show_no_items_screen(self, item_type: str) -> None:
+        """Show no items screen."""
+        self._clear_screen()
+        console.warning(f"No {item_type} found!")
+        console.info(f"Press Enter to go back...")
+        self._get_input("")
+
+    def _show_eval_set_preview(self, path: Path) -> None:
+        """Show eval set preview info."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            test_count = len(data.get("evaluations", []))
+            evaluator_count = len(data.get("evaluatorRefs", []))
+            console.info(f"    📄 {path.name}")
+            console.info(f"    📊 Tests: {test_count} | Evaluators: {evaluator_count}")
+        except:
+            console.info(f"    📄 {path.name} (error loading)")
+
+    def _show_evaluator_preview(self, path: Path) -> None:
+        """Show evaluator preview info."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            category = self._get_category_name(data.get("category", 0))
+            type_name = self._get_type_name(data.get("type", 1))
+            console.info(f"    📄 {path.name}")
+            console.info(f"    🎯 Type: {category} | {type_name}")
+        except:
+            console.info(f"    📄 {path.name} (error loading)")
+
+    def _show_eval_set_details(self, eval_set_tuple: Tuple[str, Path]) -> None:
+        """Show detailed eval set view."""
+        name, path = eval_set_tuple
+        self._clear_screen()
+        console.info(f"📋 Eval Set Details: {name}")
+        console.info("─" * 65)
+
+        try:
+            with open(path) as f:
+                data = json.load(f)
+
+            console.info(f"📄 File: {path.name}")
+            console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
+            console.info(f"📊 Tests: {len(data.get('evaluations', []))}")
+            console.info(f"⚙️  Evaluators: {len(data.get('evaluatorRefs', []))}")
+            console.info(f"📦 Batch Size: {data.get('batchSize', 'Unknown')}")
+            console.info(f"⏱️  Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes")
+
+            evaluator_refs = data.get('evaluatorRefs', [])
+            if evaluator_refs:
+                console.info(f"\n🎯 Evaluator References:")
+                for ref in evaluator_refs:
+                    console.info(f"   • {ref}")
+
+            evaluations = data.get('evaluations', [])
+            if evaluations:
+                console.info(f"\n📝 Test Cases:")
+                for i, eval_data in enumerate(evaluations[:10], 1):  # Show first 10
+                    test_name = eval_data.get('name', f'Test {i}')
+                    console.info(f"   {i}. {test_name}")
+                    if 'inputs' in eval_data:
+                        inputs_preview = str(eval_data['inputs'])[:60]
+                        if len(str(eval_data['inputs'])) > 60:
+                            inputs_preview += "..."
+                        console.info(f"      Input: {inputs_preview}")
+                    if 'expectedOutput' in eval_data:
+                        output_preview = str(eval_data['expectedOutput'])[:60]
+                        if len(str(eval_data['expectedOutput'])) > 60:
+                            output_preview += "..."
+                        console.info(f"      Expected: {output_preview}")
+
+                if len(evaluations) > 10:
+                    console.info(f"   ... and {len(evaluations) - 10} more tests")
+
+        except Exception as e:
+            console.error(f"Error loading eval set: {e}")
+
+        console.info("\n⌨️  Press q/Backspace to go back...")
+        while True:
+            key = self._get_key_input()
+            if key in ['q', 'Q', 'back']:
+                break
+
+    def _show_evaluator_details(self, evaluator_tuple: Tuple[str, Path]) -> None:
+        """Show detailed evaluator view."""
+        name, path = evaluator_tuple
+        self._clear_screen()
+        console.info(f"⚙️  Evaluator Details: {name}")
+        console.info("─" * 65)
+
+        try:
+            with open(path) as f:
+                data = json.load(f)
+
+            console.info(f"📄 File: {path.name}")
+            console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
+            console.info(f"📝 Description: {data.get('description', 'No description')}")
+            console.info(f"🏷️  Category: {self._get_category_name(data.get('category', 0))}")
+            console.info(f"🎯 Type: {self._get_type_name(data.get('type', 1))}")
+            console.info(f"🔍 Target Key: {data.get('targetOutputKey', '*')}")
+
+            if 'llmConfig' in data:
+                llm_config = data['llmConfig']
+                console.info(f"\n🤖 LLM Configuration:")
+                console.info(f"   Model: {llm_config.get('modelName', 'Unknown')}")
+                if 'prompt' in llm_config:
+                    prompt_preview = llm_config['prompt'][:100]
+                    if len(llm_config['prompt']) > 100:
+                        prompt_preview += "..."
+                    console.info(f"   Prompt: {prompt_preview}")
+
+        except Exception as e:
+            console.error(f"Error loading evaluator: {e}")
+
+        console.info("\n⌨️  Press q/Backspace to go back...")
+        while True:
+            key = self._get_key_input()
+            if key in ['q', 'Q', 'back']:
+                break
+
+    def _create_eval_set(self) -> None:
+        """Create new evaluation set interactively."""
+        console.info("\n➕ Create New Eval Set")
+
+        name = self._get_input("Name: ")
+        if not name:
+            return
+
+        # Create clean filename from name
+        filename = f"{name.lower().replace(' ', '_')}.json"
+
+        # Create basic eval set
+        eval_set = {
+            "id": f"eval-{len(self.eval_sets) + 1}",
+            "fileName": filename,
+            "evaluatorRefs": [],
+            "name": name,
+            "batchSize": 10,
+            "timeoutMinutes": 20,
+            "modelSettings": [],
+            "createdAt": "2025-01-25T00:00:00Z",
+            "updatedAt": "2025-01-25T00:00:00Z",
+            "evaluations": []
+        }
+
+        # Ask if they want to add evaluations
+        add_evals = self._get_input("Add evaluations now? (y/n): ").lower()
+        if add_evals in ['y', 'yes']:
+            eval_set["evaluations"] = self._add_evaluations_interactive(eval_set["id"])
+
+        # Ensure evaluationSets directory exists
+        eval_sets_dir = self.project_root / "evaluationSets"
+        eval_sets_dir.mkdir(exist_ok=True)
+
+        # Save file
+        file_path = eval_sets_dir / filename
+
+        with open(file_path, 'w') as f:
+            json.dump(eval_set, f, indent=2)
+
+        console.success(f"✅ Created eval set: {filename}")
+        self._discover_files()  # Refresh
+
+    def _create_eval_set_interactive(self) -> None:
+        """Create new evaluation set with comprehensive questions."""
+        self._clear_screen()
+        console.info("➕ Create New Eval Set - Interactive Wizard")
+        console.info("─" * 65)
+
+        # Basic Information
+        console.info("📝 Basic Information")
+        name = input("➤ Eval Set Name: ").strip()
+        if not name:
+            console.warning("Name is required!")
+            input("Press Enter to continue...")
+            return
+
+        # Create clean filename from name
+        filename = f"{name.lower().replace(' ', '_')}.json"
+
+        # Evaluator References
+        console.info("\n🎯 Evaluator References")
+        console.info("Available evaluators:")
+        for i, (eval_name, _) in enumerate(self.evaluators, 1):
+            console.info(f"  {i}. {eval_name}")
+
+        evaluator_refs = []
+        if self.evaluators:
+            refs_input = input("➤ Select evaluators (comma-separated numbers, or 'all'): ").strip()
+            if refs_input.lower() == 'all':
+                evaluator_refs = [self._get_evaluator_id(path) for eval_name, path in self.evaluators]
+            elif refs_input:
+                try:
+                    for num in refs_input.split(','):
+                        idx = int(num.strip()) - 1
+                        if 0 <= idx < len(self.evaluators):
+                            eval_path = self.evaluators[idx][1]
+                            eval_id = self._get_evaluator_id(eval_path)
+                            evaluator_refs.append(eval_id)
+                except ValueError:
+                    console.warning("Invalid input, no evaluators selected")
+
+        # Test Cases
+        console.info("\n📝 Test Cases")
+        evaluations = []
+        test_count = 1
+
+        while True:
+            console.info(f"\nTest Case #{test_count}")
+            test_name = input("➤ Test Name (or 'done' to finish): ").strip()
+            if test_name.lower() == 'done':
+                break
+
+            if not test_name:
+                console.warning("Test name is required!")
+                continue
+
+            # Inputs
+            console.info("📥 Inputs (JSON format)")
+            console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}")
+            inputs_str = input("➤ Inputs: ").strip()
+            try:
+                inputs = json.loads(inputs_str) if inputs_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty inputs")
+                inputs = {}
+
+            # Expected Output
+            console.info("📤 Expected Output (JSON format)")
+            expected_str = input("➤ Expected Output: ").strip()
+            try:
+                expected_output = json.loads(expected_str) if expected_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty expected output")
+                expected_output = {}
+
+            evaluation = {
+                "id": f"test-{test_count}",
+                "name": test_name,
+                "inputs": inputs,
+                "expectedOutput": expected_output,
+                "expectedAgentBehavior": "",
+                "simulationInstructions": "",
+                "simulateInput": False,
+                "inputGenerationInstructions": "",
+                "simulateTools": False,
+                "toolsToSimulate": [],
+                "evalSetId": f"eval-{len(self.eval_sets) + 1}",
+                "createdAt": "2025-01-25T00:00:00Z",
+                "updatedAt": "2025-01-25T00:00:00Z"
+            }
+            evaluations.append(evaluation)
+            test_count += 1
+
+        if not evaluations:
+            console.warning("At least one test case is required!")
+            input("Press Enter to continue...")
+            return
+
+        # Create eval set
+        eval_set = {
+            "id": f"eval-{len(self.eval_sets) + 1}",
+            "fileName": filename,
+            "evaluatorRefs": evaluator_refs,
+            "name": name,
+            "batchSize": 10,
+            "timeoutMinutes": 20,
+            "modelSettings": [],
+            "createdAt": "2025-01-25T00:00:00Z",
+            "updatedAt": "2025-01-25T00:00:00Z",
+            "evaluations": evaluations
+        }
+
+        # Ensure evaluationSets directory exists
+        eval_sets_dir = self.project_root / "evaluationSets"
+        eval_sets_dir.mkdir(exist_ok=True)
+
+        # Save file
+        file_path = eval_sets_dir / filename
+
+        try:
+            with open(file_path, 'w') as f:
+                json.dump(eval_set, f, indent=2)
+
+            console.success(f"\n✅ Created eval set: {filename}")
+            console.info(f"📊 Tests: {len(evaluations)}")
+            console.info(f"⚙️  Evaluators: {len(evaluator_refs)}")
+
+            self._discover_files()  # Refresh
+        except Exception as e:
+            console.error(f"Failed to create eval set: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _add_evaluations_interactive(self, eval_set_id: str) -> List[dict]:
+        """Add evaluations interactively."""
+        evaluations = []
+        test_count = 1
+
+        while True:
+            console.info(f"\nTest Case #{test_count}")
+            test_name = self._get_input("Test Name (or 'done' to finish): ")
+            if test_name.lower() == 'done':
+                break
+
+            if not test_name:
+                console.warning("Test name is required!")
+                continue
+
+            # Simple inputs
+            console.info("Inputs (JSON format, e.g., {\"a\": 5, \"b\": 3})")
+            inputs_str = self._get_input("Inputs: ")
+            try:
+                inputs = json.loads(inputs_str) if inputs_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty inputs")
+                inputs = {}
+
+            # Expected output
+            console.info("Expected Output (JSON format)")
+            expected_str = self._get_input("Expected Output: ")
+            try:
+                expected_output = json.loads(expected_str) if expected_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty expected output")
+                expected_output = {}
+
+            evaluation = {
+                "id": f"test-{test_count}",
+                "name": test_name,
+                "inputs": inputs,
+                "expectedOutput": expected_output,
+                "expectedAgentBehavior": "",
+                "simulationInstructions": "",
+                "simulateInput": False,
+                "inputGenerationInstructions": "",
+                "simulateTools": False,
+                "toolsToSimulate": [],
+                "evalSetId": eval_set_id,
+                "createdAt": "2025-01-25T00:00:00Z",
+                "updatedAt": "2025-01-25T00:00:00Z"
+            }
+            evaluations.append(evaluation)
+            test_count += 1
+
+        return evaluations
+
+    def _create_evaluator(self) -> None:
+        """Create new evaluator interactively."""
+        console.info("\n➕ Create New Evaluator")
+
+        # Select template
+        console.info("Templates:")
+        console.info("1. Exact Match")
+        console.info("2. JSON Similarity")
+
+        template = self._get_number_input("Template (1-2): ", 1, 2)
+        if template is None:
+            return
+
+        name = self._get_input("Name: ")
+        if not name:
+            return
+
+        # Template configurations
+        if template == 1:
+            evaluator = {
+                "id": f"eval-{name.lower().replace(' ', '-')}",
+                "name": name,
+                "description": "Exact match evaluator",
+                "category": 0,
+                "type": 1,
+                "targetOutputKey": "*",
+                "createdAt": "2025-01-25T00:00:00Z",
+                "updatedAt": "2025-01-25T00:00:00Z"
+            }
+        else:  # JSON Similarity
+            evaluator = {
+                "id": f"eval-{name.lower().replace(' ', '-')}",
+                "name": name,
+                "description": "JSON similarity evaluator",
+                "category": 0,
+                "type": 6,
+                "targetOutputKey": "*",
+                "createdAt": "2025-01-25T00:00:00Z",
+                "updatedAt": "2025-01-25T00:00:00Z"
+            }
+
+        # Ensure evaluators directory exists
+        evaluators_dir = self.project_root / "evaluators"
+        evaluators_dir.mkdir(exist_ok=True)
+
+        # Save file
+        filename = f"{name.lower().replace(' ', '_')}.json"
+        file_path = evaluators_dir / filename
+
+        with open(file_path, 'w') as f:
+            json.dump(evaluator, f, indent=2)
+
+        console.success(f"✅ Created evaluator: {filename}")
+        self._discover_files()  # Refresh
+
+    def _create_evaluator_interactive(self) -> None:
+        """Create new evaluator with comprehensive questions."""
+        self._clear_screen()
+        console.info("➕ Create New Evaluator - Interactive Wizard")
+        console.info("─" * 65)
+
+        # Basic Information
+        console.info("📝 Basic Information")
+        name = input("➤ Evaluator Name: ").strip()
+        if not name:
+            console.warning("Name is required!")
+            input("Press Enter to continue...")
+            return
+
+        description = input("➤ Description: ").strip() or f"{name} evaluator"
+
+        # Category Selection
+        console.info("\n🏷️ Category Selection")
+        categories = {
+            0: "Deterministic",
+            1: "LLM as Judge",
+            2: "Agent Scorer",
+            3: "Trajectory"
+        }
+
+        for key, value in categories.items():
+            console.info(f"  {key}. {value}")
+
+        try:
+            category = int(input("➤ Select Category (0-3): ") or "0")
+            if category not in categories:
+                category = 0
+        except ValueError:
+            category = 0
+
+        # Type Selection
+        console.info(f"\n🎯 Type Selection (Category: {categories[category]})")
+        types = {
+            0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex",
+            4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory"
+        }
+
+        # Show relevant types based on category
+        relevant_types = []
+        if category == 0:  # Deterministic
+            relevant_types = [1, 2, 3, 6]  # Exact Match, Contains, Regex, JSON Similarity
+        elif category == 1:  # LLM as Judge
+            relevant_types = [4, 5]  # Factuality, Custom
+        elif category == 3:  # Trajectory
+            relevant_types = [7]  # Trajectory
+        else:
+            relevant_types = list(types.keys())
+
+        for type_id in relevant_types:
+            console.info(f"  {type_id}. {types[type_id]}")
+
+        try:
+            eval_type = int(input(f"➤ Select Type ({', '.join(map(str, relevant_types))}): ") or str(relevant_types[0]))
+            if eval_type not in relevant_types:
+                eval_type = relevant_types[0]
+        except (ValueError, IndexError):
+            eval_type = 1
+
+        # Target Output Key
+        console.info(f"\n🔍 Target Configuration")
+        console.info("Target Output Key determines which part of the output to evaluate")
+        console.info("Examples: '*' (all), 'result', 'answer', 'output'")
+        target_key = input("➤ Target Output Key (default: '*'): ").strip() or "*"
+
+        # Create basic evaluator
+        evaluator = {
+            "id": f"eval-{name.lower().replace(' ', '-')}",
+            "name": name,
+            "description": description,
+            "category": category,
+            "type": eval_type,
+            "targetOutputKey": target_key,
+            "createdAt": "2025-01-25T00:00:00Z",
+            "updatedAt": "2025-01-25T00:00:00Z"
+        }
+
+        # LLM Configuration (if LLM as Judge)
+        if category == 1:  # LLM as Judge
+            console.info(f"\n🤖 LLM Configuration")
+            model_name = input("➤ Model Name (default: gpt-4): ").strip() or "gpt-4"
+
+            console.info("📝 Evaluation Prompt")
+            console.info("This prompt will be used to evaluate the agent's output")
+            prompt = input("➤ Evaluation Prompt: ").strip()
+
+            if prompt:
+                evaluator["llmConfig"] = {
+                    "modelName": model_name,
+                    "prompt": prompt,
+                    "temperature": 0.0,
+                    "maxTokens": 1000
+                }
+
+        # Ensure evaluators directory exists
+        evaluators_dir = self.project_root / "evaluators"
+        evaluators_dir.mkdir(exist_ok=True)
+
+        # Save file
+        filename = f"{name.lower().replace(' ', '_')}.json"
+        file_path = evaluators_dir / filename
+
+        try:
+            with open(file_path, 'w') as f:
+                json.dump(evaluator, f, indent=2)
+
+            console.success(f"\n✅ Created evaluator: {filename}")
+            console.info(f"🏷️  Category: {categories[category]}")
+            console.info(f"🎯 Type: {types[eval_type]}")
+            console.info(f"🔍 Target: {target_key}")
+
+            self._discover_files()  # Refresh
+        except Exception as e:
+            console.error(f"Failed to create evaluator: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _get_number_input(self, prompt: str, min_val: int, max_val: int) -> Optional[int]:
+        """Get number input with validation."""
+        try:
+            value = input(f"➤ {prompt}")
+            num = int(value)
+            if min_val <= num <= max_val:
+                return num
+            else:
+                console.warning(f"Please enter a number between {min_val} and {max_val}")
+                return None
+        except (ValueError, KeyboardInterrupt):
+            return None
+
+    def _get_evaluator_id(self, path: Path) -> str:
+        """Get evaluator ID from file."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            return data.get("id", path.stem)
+        except:
+            return path.stem
+
+
+def launch_interactive_cli(project_root: Path = None) -> None:
+    """Launch the interactive CLI."""
+    cli = InteractiveEvalCLI(project_root)
+    cli.run()
diff --git a/src/uipath/_cli/_utils/_eval_set.py b/src/uipath/_cli/_utils/_eval_set.py
index 9e95d0c71..53d55e216 100644
--- a/src/uipath/_cli/_utils/_eval_set.py
+++ b/src/uipath/_cli/_utils/_eval_set.py
@@ -13,7 +13,7 @@
 class EvalHelpers:
     @staticmethod
     def auto_discover_eval_set() -> str:
-        """Auto-discover evaluation set from evals/eval-sets directory.
+        """Auto-discover evaluation set from evaluationSets or evals/eval-sets directory.
 
         Returns:
             Path to the evaluation set file
@@ -21,19 +21,24 @@ def auto_discover_eval_set() -> str:
         Raises:
             ValueError: If no eval set found or multiple eval sets exist
         """
-        eval_sets_dir = Path("evals/eval-sets")
+        # Try evaluationSets folder first (new structure)
+        eval_sets_dir = Path("evaluationSets")
+
+        # Fall back to evals/eval-sets (old structure)
+        if not eval_sets_dir.exists():
+            eval_sets_dir = Path("evals/eval-sets")
 
         if not eval_sets_dir.exists():
             raise ValueError(
-                "No 'evals/eval-sets' directory found. "
-                "Please set 'UIPATH_PROJECT_ID' env var and run 'uipath pull'."
+                "No 'evaluationSets' or 'evals/eval-sets' directory found. "
+                "Please create an evaluation set or set 'UIPATH_PROJECT_ID' env var and run 'uipath pull'."
             )
 
         eval_set_files = list(eval_sets_dir.glob("*.json"))
 
         if not eval_set_files:
             raise ValueError(
-                "No evaluation set files found in 'evals/eval-sets' directory. "
+                f"No evaluation set files found in '{eval_sets_dir}' directory. "
             )
 
         if len(eval_set_files) > 1:
diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py
index 53dd3bc12..70debd662 100644
--- a/src/uipath/_cli/cli_eval.py
+++ b/src/uipath/_cli/cli_eval.py
@@ -31,6 +31,55 @@
 console = ConsoleLogger()
 
 
+def _display_local_results(results_data):
+    """Display evaluation results locally in a formatted way."""
+    if not results_data:
+        return
+
+    evaluation_set_name = results_data.get("evaluationSetName", "Unknown")
+    overall_score = results_data.get("score", 0.0)
+    evaluation_results = results_data.get("evaluationSetResults", [])
+
+    console.info(f"\n🎯 Evaluation Report: {evaluation_set_name}")
+    console.info(f"📊 Overall Score: {overall_score:.1f}%")
+    console.info("=" * 60)
+
+    passed_count = 0
+    total_count = len(evaluation_results)
+
+    for i, test in enumerate(evaluation_results, 1):
+        test_score = test.get("score", 0.0)
+        test_name = test.get("evaluationName", f"Test {i}")
+
+        if test_score == 100.0:
+            status = "✅ PASS"
+            passed_count += 1
+        elif test_score == 0.0:
+            status = "❌ FAIL"
+        else:
+            status = "⚠️  PARTIAL"
+            passed_count += 0.5  # Partial credit
+
+        console.info(f"\n{i}. {test_name}: {status} ({test_score:.1f}%)")
+
+        evaluator_results = test.get("evaluationRunResults", [])
+        for evaluator_result in evaluator_results:
+            evaluator_name = evaluator_result.get("evaluatorName", "Unknown Evaluator")
+            result = evaluator_result.get("result", {})
+            score = result.get("score", 0.0)
+            eval_time = result.get("evaluationTime", 0.0)
+            console.info(f"   └─ {evaluator_name}: {score:.1f}% ({eval_time*1000:.2f}ms)")
+
+    console.info(f"\n🎯 Summary: {int(passed_count)}/{total_count} tests passed")
+    if overall_score == 100.0:
+        console.success("🎉 All tests passed!")
+    elif overall_score == 0.0:
+        console.info("💥 All tests failed!")
+    else:
+        console.info(f"⚡ Partial success: {overall_score:.1f}% overall score")
+    console.info("")
+
+
 class LiteralOption(click.Option):
     def type_cast_value(self, ctx, value):
         try:
@@ -61,6 +110,12 @@ def type_cast_value(self, ctx, value):
     type=click.Path(exists=False),
     help="File path where the output will be written",
 )
+@click.option(
+    "--interactive",
+    is_flag=True,
+    help="Launch streamlined keyboard-only interactive CLI",
+    default=False,
+)
 @track(when=lambda *_a, **_kw: os.getenv(ENV_JOB_ID) is None)
 def eval(
     entrypoint: Optional[str],
@@ -69,6 +124,7 @@ def eval(
     no_report: bool,
     workers: int,
     output_file: Optional[str],
+    interactive: bool,
 ) -> None:
     """Run an evaluation set against the agent.
 
@@ -78,7 +134,20 @@ def eval(
         eval_ids: Optional list of evaluation IDs
         workers: Number of parallel workers for running evaluations
         no_report: Do not report the evaluation results
+        interactive: Launch streamlined keyboard-only interactive CLI
     """
+    # Handle interactive mode
+    if interactive:
+        try:
+            from ._eval_interactive import launch_interactive_cli
+            launch_interactive_cli()
+            return
+        except ImportError as e:
+            console.error(f"Interactive mode requires additional dependencies: {e}")
+            return
+        except Exception as e:
+            console.error(f"Failed to launch interactive mode: {e}")
+            return
     if not no_report and not os.getenv("UIPATH_FOLDER_KEY"):
         os.environ["UIPATH_FOLDER_KEY"] = asyncio.run(
             get_personal_workspace_key_async()
@@ -131,16 +200,24 @@ def generate_runtime_context(**context_kwargs) -> UiPathRuntimeContext:
             if eval_context.job_id:
                 runtime_factory.add_span_exporter(LlmOpsHttpExporter())
 
+            eval_runtime_ref = None
+
             async def execute():
+                nonlocal eval_runtime_ref
                 async with UiPathEvalRuntime.from_eval_context(
                     factory=runtime_factory,
                     context=eval_context,
                     event_bus=event_bus,
                 ) as eval_runtime:
+                    eval_runtime_ref = eval_runtime
                     await eval_runtime.execute()
                     await event_bus.wait_for_all(timeout=10)
 
             asyncio.run(execute())
+
+            # Display results locally when --no-report is used
+            if no_report and eval_runtime_ref and eval_runtime_ref.context.result:
+                _display_local_results(eval_runtime_ref.context.result.output)
         except Exception as e:
             console.error(
                 f"Error: Unexpected error occurred - {str(e)}", include_traceback=True

From 03875e1f12685350f400b05ef94b895401e67107 Mon Sep 17 00:00:00 2001
From: Chibi Vikramathithan <chibi.chakaravarthi@uipath.com>
Date: Tue, 30 Sep 2025 18:05:44 -0700
Subject: [PATCH 2/6] fix: mypy type errors in interactive eval mode

- Add Optional type hints for Path parameters
- Add Dict and Any imports for type annotations
- Add null checks for process.stdout before iteration
- Add type annotations for evaluation dictionaries
- Fix return type for _add_evaluations_interactive
---
 src/uipath/_cli/_eval_interactive.py | 24 +++++++++++++-----------
 1 file changed, 13 insertions(+), 11 deletions(-)

diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py
index bb2872ab5..c77c868e5 100644
--- a/src/uipath/_cli/_eval_interactive.py
+++ b/src/uipath/_cli/_eval_interactive.py
@@ -4,7 +4,7 @@
 import subprocess
 import sys
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import Any, Dict, List, Optional, Tuple
 
 import select
 import sys
@@ -29,7 +29,7 @@ def has_termios() -> bool:
 class InteractiveEvalCLI:
     """Simple, fast, keyboard-driven evaluation CLI."""
 
-    def __init__(self, project_root: Path = None):
+    def __init__(self, project_root: Optional[Path] = None):
         self.project_root = project_root or Path.cwd()
         self.eval_sets: List[Tuple[str, Path]] = []
         self.evaluators: List[Tuple[str, Path]] = []
@@ -457,8 +457,9 @@ def _execute_evaluation(self, eval_path: Path) -> None:
             )
 
             # Stream output in real-time
-            for line in process.stdout:
-                print(line.rstrip())
+            if process.stdout:
+                for line in process.stdout:
+                    print(line.rstrip())
 
             process.wait()
 
@@ -504,8 +505,9 @@ def _execute_evaluation_no_clear(self, eval_path: Path) -> None:
             )
 
             # Stream output in real-time
-            for line in process.stdout:
-                print(line.rstrip())
+            if process.stdout:
+                for line in process.stdout:
+                    print(line.rstrip())
 
             process.wait()
 
@@ -795,7 +797,7 @@ def _create_eval_set(self) -> None:
         # Ask if they want to add evaluations
         add_evals = self._get_input("Add evaluations now? (y/n): ").lower()
         if add_evals in ['y', 'yes']:
-            eval_set["evaluations"] = self._add_evaluations_interactive(eval_set["id"])
+            eval_set["evaluations"] = self._add_evaluations_interactive(str(eval_set["id"]))
 
         # Ensure evaluationSets directory exists
         eval_sets_dir = self.project_root / "evaluationSets"
@@ -883,7 +885,7 @@ def _create_eval_set_interactive(self) -> None:
                 console.warning("Invalid JSON, using empty expected output")
                 expected_output = {}
 
-            evaluation = {
+            evaluation: Dict[str, Any] = {
                 "id": f"test-{test_count}",
                 "name": test_name,
                 "inputs": inputs,
@@ -941,7 +943,7 @@ def _create_eval_set_interactive(self) -> None:
 
         input("\nPress Enter to continue...")
 
-    def _add_evaluations_interactive(self, eval_set_id: str) -> List[dict]:
+    def _add_evaluations_interactive(self, eval_set_id: str) -> List[Dict[str, Any]]:
         """Add evaluations interactively."""
         evaluations = []
         test_count = 1
@@ -974,7 +976,7 @@ def _add_evaluations_interactive(self, eval_set_id: str) -> List[dict]:
                 console.warning("Invalid JSON, using empty expected output")
                 expected_output = {}
 
-            evaluation = {
+            evaluation: Dict[str, Any] = {
                 "id": f"test-{test_count}",
                 "name": test_name,
                 "inputs": inputs,
@@ -1193,7 +1195,7 @@ def _get_evaluator_id(self, path: Path) -> str:
             return path.stem
 
 
-def launch_interactive_cli(project_root: Path = None) -> None:
+def launch_interactive_cli(project_root: Optional[Path] = None) -> None:
     """Launch the interactive CLI."""
     cli = InteractiveEvalCLI(project_root)
     cli.run()

From f7dbc09112780b779a2e8ccce4510db53e710e2a Mon Sep 17 00:00:00 2001
From: Chibi Vikramathithan <chibi.chakaravarthi@uipath.com>
Date: Tue, 30 Sep 2025 18:10:34 -0700
Subject: [PATCH 3/6] fix: ruff linting errors in interactive eval mode

- Move imports to top of file
- Replace all bare except clauses with Exception
- Remove unused f-string prefixes
- Fix import organization
---
 src/uipath/_cli/_eval_interactive.py | 44 +++++++++++++---------------
 1 file changed, 21 insertions(+), 23 deletions(-)

diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py
index c77c868e5..02c670c39 100644
--- a/src/uipath/_cli/_eval_interactive.py
+++ b/src/uipath/_cli/_eval_interactive.py
@@ -3,26 +3,24 @@
 import json
 import subprocess
 import sys
+import termios
+import tty
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
-import select
-import sys
-import termios
-import tty
+from ._utils._console import ConsoleLogger
+
 
 def has_termios() -> bool:
     """Check if we have termios support for advanced input."""
     try:
         termios.tcgetattr(sys.stdin)
         return True
-    except:
+    except Exception:
         return False
 
-HAS_NAVIGATION = has_termios()
-
-from ._utils._console import ConsoleLogger
 
+HAS_NAVIGATION = has_termios()
 console = ConsoleLogger()
 
 
@@ -76,7 +74,7 @@ def _discover_files(self) -> None:
                     if "evaluations" in data and isinstance(data.get("evaluations"), list):
                         name = data.get("name", eval_file.stem)
                         self.eval_sets.append((name, eval_file))
-                except:
+                except Exception:
                     pass
 
         # Find evaluators from evaluators folder
@@ -90,7 +88,7 @@ def _discover_files(self) -> None:
                     if "id" in data and "type" in data:
                         name = data.get("name", eval_file.stem)
                         self.evaluators.append((name, eval_file))
-                except:
+                except Exception:
                     pass
 
     def run(self) -> None:
@@ -220,13 +218,13 @@ def _get_key_input(self) -> str:
                 raise KeyboardInterrupt
 
             return ''
-        except:
+        except Exception:
             return input("➤ ").strip().lower()
         finally:
             # Restore terminal settings
             try:
                 termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
-            except:
+            except Exception:
                 pass
 
     def _execute_menu_item_with_navigation(self, index: int) -> None:
@@ -291,7 +289,7 @@ def _list_eval_sets(self) -> None:
                 console.info(f"{i}. {name}")
                 console.info(f"   Tests: {test_count} | Evaluators: {evaluator_count}")
                 console.info(f"   File: {path.name}")
-            except:
+            except Exception:
                 console.info(f"{i}. {name} (error loading)")
 
     def _list_evaluators(self) -> None:
@@ -310,7 +308,7 @@ def _list_evaluators(self) -> None:
                 console.info(f"{i}. {name}")
                 console.info(f"   Type: {category} | {type_name}")
                 console.info(f"   File: {path.name}")
-            except:
+            except Exception:
                 console.info(f"{i}. {name} (error loading)")
 
     def _list_eval_sets_navigation(self) -> None:
@@ -652,7 +650,7 @@ def _show_no_items_screen(self, item_type: str) -> None:
         """Show no items screen."""
         self._clear_screen()
         console.warning(f"No {item_type} found!")
-        console.info(f"Press Enter to go back...")
+        console.info("Press Enter to go back...")
         self._get_input("")
 
     def _show_eval_set_preview(self, path: Path) -> None:
@@ -664,7 +662,7 @@ def _show_eval_set_preview(self, path: Path) -> None:
             evaluator_count = len(data.get("evaluatorRefs", []))
             console.info(f"    📄 {path.name}")
             console.info(f"    📊 Tests: {test_count} | Evaluators: {evaluator_count}")
-        except:
+        except Exception:
             console.info(f"    📄 {path.name} (error loading)")
 
     def _show_evaluator_preview(self, path: Path) -> None:
@@ -676,7 +674,7 @@ def _show_evaluator_preview(self, path: Path) -> None:
             type_name = self._get_type_name(data.get("type", 1))
             console.info(f"    📄 {path.name}")
             console.info(f"    🎯 Type: {category} | {type_name}")
-        except:
+        except Exception:
             console.info(f"    📄 {path.name} (error loading)")
 
     def _show_eval_set_details(self, eval_set_tuple: Tuple[str, Path]) -> None:
@@ -699,13 +697,13 @@ def _show_eval_set_details(self, eval_set_tuple: Tuple[str, Path]) -> None:
 
             evaluator_refs = data.get('evaluatorRefs', [])
             if evaluator_refs:
-                console.info(f"\n🎯 Evaluator References:")
+                console.info("\n🎯 Evaluator References:")
                 for ref in evaluator_refs:
                     console.info(f"   • {ref}")
 
             evaluations = data.get('evaluations', [])
             if evaluations:
-                console.info(f"\n📝 Test Cases:")
+                console.info("\n📝 Test Cases:")
                 for i, eval_data in enumerate(evaluations[:10], 1):  # Show first 10
                     test_name = eval_data.get('name', f'Test {i}')
                     console.info(f"   {i}. {test_name}")
@@ -752,7 +750,7 @@ def _show_evaluator_details(self, evaluator_tuple: Tuple[str, Path]) -> None:
 
             if 'llmConfig' in data:
                 llm_config = data['llmConfig']
-                console.info(f"\n🤖 LLM Configuration:")
+                console.info("\n🤖 LLM Configuration:")
                 console.info(f"   Model: {llm_config.get('modelName', 'Unknown')}")
                 if 'prompt' in llm_config:
                     prompt_preview = llm_config['prompt'][:100]
@@ -1115,7 +1113,7 @@ def _create_evaluator_interactive(self) -> None:
             eval_type = 1
 
         # Target Output Key
-        console.info(f"\n🔍 Target Configuration")
+        console.info("\n🔍 Target Configuration")
         console.info("Target Output Key determines which part of the output to evaluate")
         console.info("Examples: '*' (all), 'result', 'answer', 'output'")
         target_key = input("➤ Target Output Key (default: '*'): ").strip() or "*"
@@ -1134,7 +1132,7 @@ def _create_evaluator_interactive(self) -> None:
 
         # LLM Configuration (if LLM as Judge)
         if category == 1:  # LLM as Judge
-            console.info(f"\n🤖 LLM Configuration")
+            console.info("\n🤖 LLM Configuration")
             model_name = input("➤ Model Name (default: gpt-4): ").strip() or "gpt-4"
 
             console.info("📝 Evaluation Prompt")
@@ -1191,7 +1189,7 @@ def _get_evaluator_id(self, path: Path) -> str:
             with open(path) as f:
                 data = json.load(f)
             return data.get("id", path.stem)
-        except:
+        except Exception:
             return path.stem
 
 

From 71b9a0049bf5aa6064248271e58205c92a8f96fe Mon Sep 17 00:00:00 2001
From: Chibi Vikramathithan <chibi.chakaravarthi@uipath.com>
Date: Tue, 30 Sep 2025 18:14:23 -0700
Subject: [PATCH 4/6] fix: use dynamic timestamps for eval sets and evaluators

Replace hardcoded 2025-01-25 timestamps with datetime.now(timezone.utc)
for createdAt and updatedAt fields in eval sets and evaluators.
---
 src/uipath/_cli/_eval_interactive.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py
index 02c670c39..a0b54d650 100644
--- a/src/uipath/_cli/_eval_interactive.py
+++ b/src/uipath/_cli/_eval_interactive.py
@@ -5,6 +5,7 @@
 import sys
 import termios
 import tty
+from datetime import datetime, timezone
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
@@ -787,8 +788,8 @@ def _create_eval_set(self) -> None:
             "batchSize": 10,
             "timeoutMinutes": 20,
             "modelSettings": [],
-            "createdAt": "2025-01-25T00:00:00Z",
-            "updatedAt": "2025-01-25T00:00:00Z",
+            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
             "evaluations": []
         }
 
@@ -895,8 +896,8 @@ def _create_eval_set_interactive(self) -> None:
                 "simulateTools": False,
                 "toolsToSimulate": [],
                 "evalSetId": f"eval-{len(self.eval_sets) + 1}",
-                "createdAt": "2025-01-25T00:00:00Z",
-                "updatedAt": "2025-01-25T00:00:00Z"
+                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
             }
             evaluations.append(evaluation)
             test_count += 1
@@ -915,8 +916,8 @@ def _create_eval_set_interactive(self) -> None:
             "batchSize": 10,
             "timeoutMinutes": 20,
             "modelSettings": [],
-            "createdAt": "2025-01-25T00:00:00Z",
-            "updatedAt": "2025-01-25T00:00:00Z",
+            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
             "evaluations": evaluations
         }
 
@@ -986,8 +987,8 @@ def _add_evaluations_interactive(self, eval_set_id: str) -> List[Dict[str, Any]]
                 "simulateTools": False,
                 "toolsToSimulate": [],
                 "evalSetId": eval_set_id,
-                "createdAt": "2025-01-25T00:00:00Z",
-                "updatedAt": "2025-01-25T00:00:00Z"
+                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
             }
             evaluations.append(evaluation)
             test_count += 1
@@ -1020,8 +1021,8 @@ def _create_evaluator(self) -> None:
                 "category": 0,
                 "type": 1,
                 "targetOutputKey": "*",
-                "createdAt": "2025-01-25T00:00:00Z",
-                "updatedAt": "2025-01-25T00:00:00Z"
+                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
             }
         else:  # JSON Similarity
             evaluator = {
@@ -1031,8 +1032,8 @@ def _create_evaluator(self) -> None:
                 "category": 0,
                 "type": 6,
                 "targetOutputKey": "*",
-                "createdAt": "2025-01-25T00:00:00Z",
-                "updatedAt": "2025-01-25T00:00:00Z"
+                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
             }
 
         # Ensure evaluators directory exists

From 5892351610a19c9567480eea2f70eaff1a50ffcb Mon Sep 17 00:00:00 2001
From: Chibi Vikramathithan <chibi.chakaravarthi@uipath.com>
Date: Tue, 30 Sep 2025 18:25:52 -0700
Subject: [PATCH 5/6] refactor: split interactive eval CLI into modular files

Reorganized the monolithic _eval_interactive.py into a maintainable
module structure under src/uipath/_cli/_interactive/:

- __init__.py: Module exports
- _main.py: Main CLI class and entry point
- _navigation.py: Navigation and input handling
- _discovery.py: File discovery for eval sets and evaluators
- _eval_sets.py: Eval set creation and management
- _evaluators.py: Evaluator creation and management
- _execution.py: Evaluation execution utilities
- _drill_down.py: Drill-down navigation views

Benefits:
- Easier to maintain and extend individual features
- Clear separation of concerns
- Better code organization
- Each file has a single responsibility

Updated pyproject.toml to disable misc/unused-ignore mypy errors for
interactive module (known limitation with mixin pattern).
---
 pyproject.toml                              |    4 +
 src/uipath/_cli/_eval_interactive.py        | 1200 -------------------
 src/uipath/_cli/_interactive/__init__.py    |    5 +
 src/uipath/_cli/_interactive/_discovery.py  |   46 +
 src/uipath/_cli/_interactive/_drill_down.py |   88 ++
 src/uipath/_cli/_interactive/_eval_sets.py  |  329 +++++
 src/uipath/_cli/_interactive/_evaluators.py |  273 +++++
 src/uipath/_cli/_interactive/_execution.py  |  135 +++
 src/uipath/_cli/_interactive/_main.py       |  193 +++
 src/uipath/_cli/_interactive/_navigation.py |  109 ++
 src/uipath/_cli/cli_eval.py                 |    2 +-
 11 files changed, 1183 insertions(+), 1201 deletions(-)
 delete mode 100644 src/uipath/_cli/_eval_interactive.py
 create mode 100644 src/uipath/_cli/_interactive/__init__.py
 create mode 100644 src/uipath/_cli/_interactive/_discovery.py
 create mode 100644 src/uipath/_cli/_interactive/_drill_down.py
 create mode 100644 src/uipath/_cli/_interactive/_eval_sets.py
 create mode 100644 src/uipath/_cli/_interactive/_evaluators.py
 create mode 100644 src/uipath/_cli/_interactive/_execution.py
 create mode 100644 src/uipath/_cli/_interactive/_main.py
 create mode 100644 src/uipath/_cli/_interactive/_navigation.py

diff --git a/pyproject.toml b/pyproject.toml
index bf148234f..21f8f967a 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -116,6 +116,10 @@ no_implicit_reexport = true
 
 disallow_untyped_defs = false
 
+[[tool.mypy.overrides]]
+module = "uipath._cli._interactive.*"
+disable_error_code = ["misc", "unused-ignore"]
+
 [tool.pydantic-mypy]
 init_forbid_extra = true
 init_typed = true
diff --git a/src/uipath/_cli/_eval_interactive.py b/src/uipath/_cli/_eval_interactive.py
deleted file mode 100644
index a0b54d650..000000000
--- a/src/uipath/_cli/_eval_interactive.py
+++ /dev/null
@@ -1,1200 +0,0 @@
-"""Simple interactive CLI for evaluations - keyboard only, no mouse."""
-
-import json
-import subprocess
-import sys
-import termios
-import tty
-from datetime import datetime, timezone
-from pathlib import Path
-from typing import Any, Dict, List, Optional, Tuple
-
-from ._utils._console import ConsoleLogger
-
-
-def has_termios() -> bool:
-    """Check if we have termios support for advanced input."""
-    try:
-        termios.tcgetattr(sys.stdin)
-        return True
-    except Exception:
-        return False
-
-
-HAS_NAVIGATION = has_termios()
-console = ConsoleLogger()
-
-
-class InteractiveEvalCLI:
-    """Simple, fast, keyboard-driven evaluation CLI."""
-
-    def __init__(self, project_root: Optional[Path] = None):
-        self.project_root = project_root or Path.cwd()
-        self.eval_sets: List[Tuple[str, Path]] = []
-        self.evaluators: List[Tuple[str, Path]] = []
-        self.current_selection = 0
-        self.menu_items = [
-            "📋 List eval sets",
-            "⚙️  List evaluators",
-            "⚡ Quick run (auto-select)",
-            "➕ Create eval set",
-            "➕ Create evaluator",
-            "🎯 Run specific combination"
-        ]
-        self._discover_files()
-
-    def _show_ascii_art(self):
-        """Display ASCII art banner."""
-        art = """
-  ██╗   ██╗██╗██████╗  █████╗ ████████╗██╗  ██╗
-  ██║   ██║██║██╔══██╗██╔══██╗╚══██╔══╝██║  ██║
-  ██║   ██║██║██████╔╝███████║   ██║   ███████║
-  ██║   ██║██║██╔═══╝ ██╔══██║   ██║   ██╔══██║
-  ╚██████╔╝██║██║     ██║  ██║   ██║   ██║  ██║
-   ╚═════╝ ╚═╝╚═╝     ╚═╝  ╚═╝   ╚═╝   ╚═╝  ╚═╝
-
-            Evaluation Builder
-        Interactive Evaluation Toolkit
-        """
-        console.info(art)
-
-    def _discover_files(self) -> None:
-        """Quickly discover eval sets and evaluators."""
-        # Clear existing lists to avoid duplicates
-        self.eval_sets.clear()
-        self.evaluators.clear()
-
-        # Find eval sets from evaluationSets folder
-        eval_sets_dir = self.project_root / "evaluationSets"
-        if eval_sets_dir.exists():
-            for eval_file in eval_sets_dir.glob("*.json"):
-                try:
-                    with open(eval_file) as f:
-                        data = json.load(f)
-                    # Check if it's an eval set by presence of "evaluations" array
-                    if "evaluations" in data and isinstance(data.get("evaluations"), list):
-                        name = data.get("name", eval_file.stem)
-                        self.eval_sets.append((name, eval_file))
-                except Exception:
-                    pass
-
-        # Find evaluators from evaluators folder
-        evaluators_dir = self.project_root / "evaluators"
-        if evaluators_dir.exists():
-            for eval_file in evaluators_dir.glob("*.json"):
-                try:
-                    with open(eval_file) as f:
-                        data = json.load(f)
-                    # Verify it has evaluator-specific fields
-                    if "id" in data and "type" in data:
-                        name = data.get("name", eval_file.stem)
-                        self.evaluators.append((name, eval_file))
-                except Exception:
-                    pass
-
-    def run(self) -> None:
-        """Run the interactive CLI."""
-        self._show_ascii_art()
-
-        if HAS_NAVIGATION:
-            self._run_with_navigation()
-        else:
-            self._run_basic()
-
-    def _run_with_navigation(self) -> None:
-        """Run with arrow key navigation."""
-        while True:
-            try:
-                self._clear_screen()
-                self._show_status()
-                self._show_navigable_menu()
-
-                # Get key input
-                key = self._get_key_input()
-
-                if key in ['q', 'Q']:
-                    console.info("👋 Goodbye!")
-                    break
-                elif key == 'up':
-                    self.current_selection = (self.current_selection - 1) % len(self.menu_items)
-                elif key == 'down':
-                    self.current_selection = (self.current_selection + 1) % len(self.menu_items)
-                elif key in ['enter', ' ']:
-                    self._execute_menu_item_with_navigation(self.current_selection)
-                elif key.isdigit() and 1 <= int(key) <= len(self.menu_items):
-                    self.current_selection = int(key) - 1
-                    self._execute_menu_item_with_navigation(self.current_selection)
-
-            except KeyboardInterrupt:
-                console.info("\n👋 Goodbye!")
-                break
-            except Exception as e:
-                console.error(f"Error: {e}")
-                self._get_input("\nPress Enter to continue...")
-
-    def _run_basic(self) -> None:
-        """Run basic mode without arrow keys."""
-        while True:
-            try:
-                self._show_status()
-                self._show_main_menu()
-                choice = self._get_input("\nChoice (1-6, q to quit): ").strip().lower()
-
-                if choice == 'q':
-                    console.info("👋 Goodbye!")
-                    break
-                elif choice.isdigit() and 1 <= int(choice) <= len(self.menu_items):
-                    self._execute_menu_item(int(choice) - 1)
-                else:
-                    console.warning("Invalid choice. Try again.")
-
-                if choice in ['1', '2']:
-                    self._get_input("\nPress Enter to continue...")
-
-            except KeyboardInterrupt:
-                console.info("\n👋 Goodbye!")
-                break
-            except Exception as e:
-                console.error(f"Error: {e}")
-
-    def _clear_screen(self) -> None:
-        """Clear the screen."""
-        import os
-        os.system('cls' if os.name == 'nt' else 'clear')
-        self._show_ascii_art()
-
-    def _show_status(self) -> None:
-        """Show project status."""
-        console.info(f"📁 Project: {self.project_root.name}")
-        console.info(f"📋 Eval Sets: {len(self.eval_sets)} | ⚙️  Evaluators: {len(self.evaluators)}")
-        console.info("─" * 65)
-
-    def _show_navigable_menu(self) -> None:
-        """Show menu with current selection highlighted."""
-        console.info("\n⌨️  Navigation: ↑↓ to navigate, Enter/Space to select, 1-6 for direct, q to quit, Backspace to go back")
-        console.info("─" * 65)
-
-        for i, item in enumerate(self.menu_items):
-            if i == self.current_selection:
-                console.info(f"► {i+1}. {item} ◄")
-            else:
-                console.info(f"  {i+1}. {item}")
-
-    def _get_key_input(self) -> str:
-        """Get key input with arrow key support."""
-        if not HAS_NAVIGATION:
-            return input("➤ ").strip().lower()
-
-        try:
-            # Set terminal to raw mode
-            old_settings = termios.tcgetattr(sys.stdin)
-            tty.setraw(sys.stdin)
-
-            char = sys.stdin.read(1)
-
-            # Handle escape sequences (arrow keys)
-            if char == '\x1b':  # ESC
-                char += sys.stdin.read(2)
-                if char == '\x1b[A':  # Up arrow
-                    return 'up'
-                elif char == '\x1b[B':  # Down arrow
-                    return 'down'
-                elif char == '\x1b[C':  # Right arrow
-                    return 'enter'
-                elif char == '\x1b[D':  # Left arrow
-                    return 'up'
-            elif char == '\r' or char == '\n':  # Enter
-                return 'enter'
-            elif char == ' ':  # Space
-                return 'enter'
-            elif char in ['q', 'Q']:
-                return 'q'
-            elif char == '\x7f':  # Backspace (DEL)
-                return 'back'
-            elif char == '\x08':  # Backspace (BS)
-                return 'back'
-            elif char.isdigit() and 1 <= int(char) <= 6:
-                return char
-            elif char == '\x03':  # Ctrl+C
-                raise KeyboardInterrupt
-
-            return ''
-        except Exception:
-            return input("➤ ").strip().lower()
-        finally:
-            # Restore terminal settings
-            try:
-                termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
-            except Exception:
-                pass
-
-    def _execute_menu_item_with_navigation(self, index: int) -> None:
-        """Execute menu item with navigation support."""
-        if index == 0:
-            self._drill_down_eval_sets()
-        elif index == 1:
-            self._drill_down_evaluators()
-        elif index == 2:
-            self._quick_run_no_clear()
-        elif index == 3:
-            self._create_eval_set_interactive()
-        elif index == 4:
-            self._create_evaluator_interactive()
-        elif index == 5:
-            self._run_specific_navigation()
-
-    def _execute_menu_item(self, index: int) -> None:
-        """Execute selected menu item (basic mode)."""
-        if index == 0:
-            self._list_eval_sets()
-        elif index == 1:
-            self._list_evaluators()
-        elif index == 2:
-            self._quick_run()
-        elif index == 3:
-            self._create_eval_set()
-        elif index == 4:
-            self._create_evaluator()
-        elif index == 5:
-            self._run_specific()
-
-        if index in [0, 1]:
-            self._get_input("\nPress Enter to continue...")
-
-    def _show_main_menu(self) -> None:
-        """Show main menu options."""
-        console.info(f"\n📁 Project: {self.project_root.name}")
-        console.info(f"📋 Eval Sets: {len(self.eval_sets)} | ⚙️  Evaluators: {len(self.evaluators)}")
-        console.info("\n" + "─" * 50)
-        console.info("1. 📋 List eval sets")
-        console.info("2. ⚙️  List evaluators")
-        console.info("3. ⚡ Quick run (auto-select)")
-        console.info("4. ➕ Create eval set")
-        console.info("5. ➕ Create evaluator")
-        console.info("6. 🎯 Run specific combination")
-
-    def _list_eval_sets(self) -> None:
-        """List available evaluation sets."""
-        console.info("\n📋 Available Eval Sets:")
-        if not self.eval_sets:
-            console.warning("No eval sets found")
-            return
-
-        for i, (name, path) in enumerate(self.eval_sets, 1):
-            # Load test count
-            try:
-                with open(path) as f:
-                    data = json.load(f)
-                test_count = len(data.get("evaluations", []))
-                evaluator_count = len(data.get("evaluatorRefs", []))
-                console.info(f"{i}. {name}")
-                console.info(f"   Tests: {test_count} | Evaluators: {evaluator_count}")
-                console.info(f"   File: {path.name}")
-            except Exception:
-                console.info(f"{i}. {name} (error loading)")
-
-    def _list_evaluators(self) -> None:
-        """List available evaluators."""
-        console.info("\n⚙️  Available Evaluators:")
-        if not self.evaluators:
-            console.warning("No evaluators found")
-            return
-
-        for i, (name, path) in enumerate(self.evaluators, 1):
-            try:
-                with open(path) as f:
-                    data = json.load(f)
-                category = self._get_category_name(data.get("category", 0))
-                type_name = self._get_type_name(data.get("type", 1))
-                console.info(f"{i}. {name}")
-                console.info(f"   Type: {category} | {type_name}")
-                console.info(f"   File: {path.name}")
-            except Exception:
-                console.info(f"{i}. {name} (error loading)")
-
-    def _list_eval_sets_navigation(self) -> None:
-        """List eval sets with navigation."""
-        self._clear_screen()
-        console.info("📋 Available Eval Sets")
-        console.info("─" * 65)
-        self._list_eval_sets()
-        console.info("\n⌨️  Press any key to go back...")
-        self._get_key_input()
-
-    def _list_evaluators_navigation(self) -> None:
-        """List evaluators with navigation."""
-        self._clear_screen()
-        console.info("⚙️  Available Evaluators")
-        console.info("─" * 65)
-        self._list_evaluators()
-        console.info("\n⌨️  Press any key to go back...")
-        self._get_key_input()
-
-    def _quick_run(self) -> None:
-        """Quick run with auto-selection."""
-        if not self.eval_sets:
-            console.error("No eval sets found!")
-            return
-
-        if not self.evaluators:
-            console.error("No evaluators found!")
-            return
-
-        console.info("\n⚡ Quick Run:")
-
-        # Auto-select first eval set
-        eval_name, eval_path = self.eval_sets[0]
-        console.info(f"📋 Using eval set: {eval_name}")
-
-        # Auto-select all evaluators
-        console.info(f"⚙️  Using {len(self.evaluators)} evaluators")
-
-        if self._confirm("Run evaluation now?"):
-            self._execute_evaluation(eval_path)
-
-    def _quick_run_no_clear(self) -> None:
-        """Quick run without clearing screen."""
-        if not self.eval_sets:
-            console.error("No eval sets found!")
-            input("\nPress Enter to continue...")
-            return
-
-        if not self.evaluators:
-            console.error("No evaluators found!")
-            input("\nPress Enter to continue...")
-            return
-
-        console.info("\n⚡ Quick Run:")
-
-        # Auto-select first eval set
-        eval_name, eval_path = self.eval_sets[0]
-        console.info(f"📋 Using eval set: {eval_name}")
-
-        # Auto-select all evaluators
-        console.info(f"⚙️  Using {len(self.evaluators)} evaluators")
-
-        if self._confirm("Run evaluation now?"):
-            self._execute_evaluation_no_clear(eval_path)
-
-    def _run_specific(self) -> None:
-        """Run with specific selection."""
-        if not self.eval_sets or not self.evaluators:
-            console.error("Need both eval sets and evaluators!")
-            return
-
-        # Select eval set with navigation
-        eval_choice = self._select_from_list(self.eval_sets, "Eval Set")
-        if eval_choice is None:
-            return
-
-        eval_name, eval_path = self.eval_sets[eval_choice - 1]
-        console.success(f"Selected: {eval_name}")
-
-        # Confirm and run
-        if self._confirm("Run evaluation now?"):
-            self._execute_evaluation(eval_path)
-
-    def _run_specific_navigation(self) -> None:
-        """Run specific combination with navigation."""
-        if not self.eval_sets or not self.evaluators:
-            console.error("Need both eval sets and evaluators!")
-            input("\nPress Enter to continue...")
-            return
-
-        # Select eval set
-        self._clear_screen()
-        console.info("🎯 Select Evaluation Set")
-        console.info("─" * 65)
-        self._list_eval_sets()
-
-        choice = input("\n➤ Select eval set number (or q to cancel): ").strip()
-        if choice.lower() == 'q':
-            return
-
-        try:
-            eval_choice = int(choice)
-            if 1 <= eval_choice <= len(self.eval_sets):
-                eval_name, eval_path = self.eval_sets[eval_choice - 1]
-                console.success(f"Selected: {eval_name}")
-
-                if self._confirm("Run evaluation now?"):
-                    self._execute_evaluation_no_clear(eval_path)
-        except ValueError:
-            console.error("Invalid selection")
-            input("\nPress Enter to continue...")
-
-    def _execute_evaluation(self, eval_path: Path) -> None:
-        """Execute evaluation with live results."""
-        console.info("\n🚀 Running evaluation...")
-
-        # Find main.py
-        main_py = self._find_main_py()
-        if not main_py:
-            console.error("Could not find main.py")
-            return
-
-        # Build command - run from the project directory
-        cmd = [
-            sys.executable, "-m", "uipath._cli.cli_eval",
-            str(main_py.relative_to(self.project_root)),
-            str(eval_path.relative_to(self.project_root)),
-            "--no-report", "--workers", "1"
-        ]
-
-        console.info(f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report")
-
-        try:
-            # Run with real-time output from project directory
-            process = subprocess.Popen(
-                cmd,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT,
-                text=True,
-                bufsize=1,
-                universal_newlines=True,
-                cwd=self.project_root
-            )
-
-            # Stream output in real-time
-            if process.stdout:
-                for line in process.stdout:
-                    print(line.rstrip())
-
-            process.wait()
-
-            if process.returncode == 0:
-                console.success("\n✅ Evaluation completed successfully!")
-            else:
-                console.error(f"\n❌ Evaluation failed (exit code: {process.returncode})")
-
-        except Exception as e:
-            console.error(f"Failed to run evaluation: {e}")
-
-    def _execute_evaluation_no_clear(self, eval_path: Path) -> None:
-        """Execute evaluation without clearing screen."""
-        console.info("\n🚀 Running evaluation...")
-
-        # Find main.py
-        main_py = self._find_main_py()
-        if not main_py:
-            console.error("Could not find main.py")
-            input("\nPress Enter to continue...")
-            return
-
-        # Build command - run from the project directory
-        cmd = [
-            sys.executable, "-m", "uipath._cli.cli_eval",
-            str(main_py.relative_to(self.project_root)),
-            str(eval_path.relative_to(self.project_root)),
-            "--no-report", "--workers", "1"
-        ]
-
-        console.info(f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report")
-
-        try:
-            # Run with real-time output from project directory
-            process = subprocess.Popen(
-                cmd,
-                stdout=subprocess.PIPE,
-                stderr=subprocess.STDOUT,
-                text=True,
-                bufsize=1,
-                universal_newlines=True,
-                cwd=self.project_root
-            )
-
-            # Stream output in real-time
-            if process.stdout:
-                for line in process.stdout:
-                    print(line.rstrip())
-
-            process.wait()
-
-            if process.returncode == 0:
-                console.success("\n✅ Evaluation completed successfully!")
-            else:
-                console.error(f"\n❌ Evaluation failed (exit code: {process.returncode})")
-
-        except Exception as e:
-            console.error(f"Failed to run evaluation: {e}")
-
-        input("\nPress Enter to continue...")
-
-    def _find_main_py(self) -> Optional[Path]:
-        """Find main.py file."""
-        # Check current directory
-        main_py = self.project_root / "main.py"
-        if main_py.exists():
-            return main_py
-
-        # Check parent directories
-        for parent in self.project_root.parents:
-            main_py = parent / "main.py"
-            if main_py.exists():
-                return main_py
-
-        return None
-
-    def _get_input(self, prompt: str) -> str:
-        """Get user input with prompt."""
-        try:
-            return input(f"➤ {prompt}")
-        except KeyboardInterrupt:
-            raise
-
-    def _select_from_list(self, items: List[Tuple[str, Path]], title: str) -> Optional[int]:
-        """Interactive list selection."""
-        if not items:
-            console.warning(f"No {title.lower()} found")
-            return None
-
-        console.info(f"\n{title}:")
-        for i, (name, _) in enumerate(items, 1):
-            console.info(f"{i}. {name}")
-
-        try:
-            value = input(f"➤ {title} number: ")
-            num = int(value)
-            if 1 <= num <= len(items):
-                return num
-            else:
-                console.warning(f"Please enter a number between 1 and {len(items)}")
-                return None
-        except (ValueError, KeyboardInterrupt):
-            return None
-
-    def _confirm(self, message: str) -> bool:
-        """Get yes/no confirmation."""
-        response = self._get_input(f"{message} (y/n): ").lower()
-        return response in ['y', 'yes']
-
-    def _get_category_name(self, category: int) -> str:
-        """Get category name."""
-        names = {0: "Deterministic", 1: "LLM Judge", 2: "Agent Scorer", 3: "Trajectory"}
-        return names.get(category, "Unknown")
-
-    def _get_type_name(self, eval_type: int) -> str:
-        """Get type name."""
-        names = {
-            0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex",
-            4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory"
-        }
-        return names.get(eval_type, "Unknown")
-
-    def _drill_down_eval_sets(self) -> None:
-        """Drill down into eval sets with navigation."""
-        if not self.eval_sets:
-            self._show_no_items_screen("eval sets")
-            return
-
-        current_selection = 0
-        while True:
-            self._clear_screen()
-            console.info("📋 Eval Sets - Navigate & Select")
-            console.info("⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back")
-            console.info("─" * 65)
-
-            for i, (name, path) in enumerate(self.eval_sets):
-                if i == current_selection:
-                    console.info(f"► {i+1}. {name} ◄")
-                    self._show_eval_set_preview(path)
-                else:
-                    console.info(f"  {i+1}. {name}")
-
-            key = self._get_key_input()
-
-            if key in ['q', 'Q', 'back']:
-                break
-            elif key == 'up':
-                current_selection = (current_selection - 1) % len(self.eval_sets)
-            elif key == 'down':
-                current_selection = (current_selection + 1) % len(self.eval_sets)
-            elif key in ['enter', ' ']:
-                self._show_eval_set_details(self.eval_sets[current_selection])
-            elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets):
-                current_selection = int(key) - 1
-
-    def _drill_down_evaluators(self) -> None:
-        """Drill down into evaluators with navigation."""
-        if not self.evaluators:
-            self._show_no_items_screen("evaluators")
-            return
-
-        current_selection = 0
-        while True:
-            self._clear_screen()
-            console.info("⚙️  Evaluators - Navigate & Select")
-            console.info("⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back")
-            console.info("─" * 65)
-
-            for i, (name, path) in enumerate(self.evaluators):
-                if i == current_selection:
-                    console.info(f"► {i+1}. {name} ◄")
-                    self._show_evaluator_preview(path)
-                else:
-                    console.info(f"  {i+1}. {name}")
-
-            key = self._get_key_input()
-
-            if key in ['q', 'Q', 'back']:
-                break
-            elif key == 'up':
-                current_selection = (current_selection - 1) % len(self.evaluators)
-            elif key == 'down':
-                current_selection = (current_selection + 1) % len(self.evaluators)
-            elif key in ['enter', ' ']:
-                self._show_evaluator_details(self.evaluators[current_selection])
-            elif key.isdigit() and 1 <= int(key) <= len(self.evaluators):
-                current_selection = int(key) - 1
-
-    def _show_no_items_screen(self, item_type: str) -> None:
-        """Show no items screen."""
-        self._clear_screen()
-        console.warning(f"No {item_type} found!")
-        console.info("Press Enter to go back...")
-        self._get_input("")
-
-    def _show_eval_set_preview(self, path: Path) -> None:
-        """Show eval set preview info."""
-        try:
-            with open(path) as f:
-                data = json.load(f)
-            test_count = len(data.get("evaluations", []))
-            evaluator_count = len(data.get("evaluatorRefs", []))
-            console.info(f"    📄 {path.name}")
-            console.info(f"    📊 Tests: {test_count} | Evaluators: {evaluator_count}")
-        except Exception:
-            console.info(f"    📄 {path.name} (error loading)")
-
-    def _show_evaluator_preview(self, path: Path) -> None:
-        """Show evaluator preview info."""
-        try:
-            with open(path) as f:
-                data = json.load(f)
-            category = self._get_category_name(data.get("category", 0))
-            type_name = self._get_type_name(data.get("type", 1))
-            console.info(f"    📄 {path.name}")
-            console.info(f"    🎯 Type: {category} | {type_name}")
-        except Exception:
-            console.info(f"    📄 {path.name} (error loading)")
-
-    def _show_eval_set_details(self, eval_set_tuple: Tuple[str, Path]) -> None:
-        """Show detailed eval set view."""
-        name, path = eval_set_tuple
-        self._clear_screen()
-        console.info(f"📋 Eval Set Details: {name}")
-        console.info("─" * 65)
-
-        try:
-            with open(path) as f:
-                data = json.load(f)
-
-            console.info(f"📄 File: {path.name}")
-            console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
-            console.info(f"📊 Tests: {len(data.get('evaluations', []))}")
-            console.info(f"⚙️  Evaluators: {len(data.get('evaluatorRefs', []))}")
-            console.info(f"📦 Batch Size: {data.get('batchSize', 'Unknown')}")
-            console.info(f"⏱️  Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes")
-
-            evaluator_refs = data.get('evaluatorRefs', [])
-            if evaluator_refs:
-                console.info("\n🎯 Evaluator References:")
-                for ref in evaluator_refs:
-                    console.info(f"   • {ref}")
-
-            evaluations = data.get('evaluations', [])
-            if evaluations:
-                console.info("\n📝 Test Cases:")
-                for i, eval_data in enumerate(evaluations[:10], 1):  # Show first 10
-                    test_name = eval_data.get('name', f'Test {i}')
-                    console.info(f"   {i}. {test_name}")
-                    if 'inputs' in eval_data:
-                        inputs_preview = str(eval_data['inputs'])[:60]
-                        if len(str(eval_data['inputs'])) > 60:
-                            inputs_preview += "..."
-                        console.info(f"      Input: {inputs_preview}")
-                    if 'expectedOutput' in eval_data:
-                        output_preview = str(eval_data['expectedOutput'])[:60]
-                        if len(str(eval_data['expectedOutput'])) > 60:
-                            output_preview += "..."
-                        console.info(f"      Expected: {output_preview}")
-
-                if len(evaluations) > 10:
-                    console.info(f"   ... and {len(evaluations) - 10} more tests")
-
-        except Exception as e:
-            console.error(f"Error loading eval set: {e}")
-
-        console.info("\n⌨️  Press q/Backspace to go back...")
-        while True:
-            key = self._get_key_input()
-            if key in ['q', 'Q', 'back']:
-                break
-
-    def _show_evaluator_details(self, evaluator_tuple: Tuple[str, Path]) -> None:
-        """Show detailed evaluator view."""
-        name, path = evaluator_tuple
-        self._clear_screen()
-        console.info(f"⚙️  Evaluator Details: {name}")
-        console.info("─" * 65)
-
-        try:
-            with open(path) as f:
-                data = json.load(f)
-
-            console.info(f"📄 File: {path.name}")
-            console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
-            console.info(f"📝 Description: {data.get('description', 'No description')}")
-            console.info(f"🏷️  Category: {self._get_category_name(data.get('category', 0))}")
-            console.info(f"🎯 Type: {self._get_type_name(data.get('type', 1))}")
-            console.info(f"🔍 Target Key: {data.get('targetOutputKey', '*')}")
-
-            if 'llmConfig' in data:
-                llm_config = data['llmConfig']
-                console.info("\n🤖 LLM Configuration:")
-                console.info(f"   Model: {llm_config.get('modelName', 'Unknown')}")
-                if 'prompt' in llm_config:
-                    prompt_preview = llm_config['prompt'][:100]
-                    if len(llm_config['prompt']) > 100:
-                        prompt_preview += "..."
-                    console.info(f"   Prompt: {prompt_preview}")
-
-        except Exception as e:
-            console.error(f"Error loading evaluator: {e}")
-
-        console.info("\n⌨️  Press q/Backspace to go back...")
-        while True:
-            key = self._get_key_input()
-            if key in ['q', 'Q', 'back']:
-                break
-
-    def _create_eval_set(self) -> None:
-        """Create new evaluation set interactively."""
-        console.info("\n➕ Create New Eval Set")
-
-        name = self._get_input("Name: ")
-        if not name:
-            return
-
-        # Create clean filename from name
-        filename = f"{name.lower().replace(' ', '_')}.json"
-
-        # Create basic eval set
-        eval_set = {
-            "id": f"eval-{len(self.eval_sets) + 1}",
-            "fileName": filename,
-            "evaluatorRefs": [],
-            "name": name,
-            "batchSize": 10,
-            "timeoutMinutes": 20,
-            "modelSettings": [],
-            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "evaluations": []
-        }
-
-        # Ask if they want to add evaluations
-        add_evals = self._get_input("Add evaluations now? (y/n): ").lower()
-        if add_evals in ['y', 'yes']:
-            eval_set["evaluations"] = self._add_evaluations_interactive(str(eval_set["id"]))
-
-        # Ensure evaluationSets directory exists
-        eval_sets_dir = self.project_root / "evaluationSets"
-        eval_sets_dir.mkdir(exist_ok=True)
-
-        # Save file
-        file_path = eval_sets_dir / filename
-
-        with open(file_path, 'w') as f:
-            json.dump(eval_set, f, indent=2)
-
-        console.success(f"✅ Created eval set: {filename}")
-        self._discover_files()  # Refresh
-
-    def _create_eval_set_interactive(self) -> None:
-        """Create new evaluation set with comprehensive questions."""
-        self._clear_screen()
-        console.info("➕ Create New Eval Set - Interactive Wizard")
-        console.info("─" * 65)
-
-        # Basic Information
-        console.info("📝 Basic Information")
-        name = input("➤ Eval Set Name: ").strip()
-        if not name:
-            console.warning("Name is required!")
-            input("Press Enter to continue...")
-            return
-
-        # Create clean filename from name
-        filename = f"{name.lower().replace(' ', '_')}.json"
-
-        # Evaluator References
-        console.info("\n🎯 Evaluator References")
-        console.info("Available evaluators:")
-        for i, (eval_name, _) in enumerate(self.evaluators, 1):
-            console.info(f"  {i}. {eval_name}")
-
-        evaluator_refs = []
-        if self.evaluators:
-            refs_input = input("➤ Select evaluators (comma-separated numbers, or 'all'): ").strip()
-            if refs_input.lower() == 'all':
-                evaluator_refs = [self._get_evaluator_id(path) for eval_name, path in self.evaluators]
-            elif refs_input:
-                try:
-                    for num in refs_input.split(','):
-                        idx = int(num.strip()) - 1
-                        if 0 <= idx < len(self.evaluators):
-                            eval_path = self.evaluators[idx][1]
-                            eval_id = self._get_evaluator_id(eval_path)
-                            evaluator_refs.append(eval_id)
-                except ValueError:
-                    console.warning("Invalid input, no evaluators selected")
-
-        # Test Cases
-        console.info("\n📝 Test Cases")
-        evaluations = []
-        test_count = 1
-
-        while True:
-            console.info(f"\nTest Case #{test_count}")
-            test_name = input("➤ Test Name (or 'done' to finish): ").strip()
-            if test_name.lower() == 'done':
-                break
-
-            if not test_name:
-                console.warning("Test name is required!")
-                continue
-
-            # Inputs
-            console.info("📥 Inputs (JSON format)")
-            console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}")
-            inputs_str = input("➤ Inputs: ").strip()
-            try:
-                inputs = json.loads(inputs_str) if inputs_str else {}
-            except json.JSONDecodeError:
-                console.warning("Invalid JSON, using empty inputs")
-                inputs = {}
-
-            # Expected Output
-            console.info("📤 Expected Output (JSON format)")
-            expected_str = input("➤ Expected Output: ").strip()
-            try:
-                expected_output = json.loads(expected_str) if expected_str else {}
-            except json.JSONDecodeError:
-                console.warning("Invalid JSON, using empty expected output")
-                expected_output = {}
-
-            evaluation: Dict[str, Any] = {
-                "id": f"test-{test_count}",
-                "name": test_name,
-                "inputs": inputs,
-                "expectedOutput": expected_output,
-                "expectedAgentBehavior": "",
-                "simulationInstructions": "",
-                "simulateInput": False,
-                "inputGenerationInstructions": "",
-                "simulateTools": False,
-                "toolsToSimulate": [],
-                "evalSetId": f"eval-{len(self.eval_sets) + 1}",
-                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
-            }
-            evaluations.append(evaluation)
-            test_count += 1
-
-        if not evaluations:
-            console.warning("At least one test case is required!")
-            input("Press Enter to continue...")
-            return
-
-        # Create eval set
-        eval_set = {
-            "id": f"eval-{len(self.eval_sets) + 1}",
-            "fileName": filename,
-            "evaluatorRefs": evaluator_refs,
-            "name": name,
-            "batchSize": 10,
-            "timeoutMinutes": 20,
-            "modelSettings": [],
-            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "evaluations": evaluations
-        }
-
-        # Ensure evaluationSets directory exists
-        eval_sets_dir = self.project_root / "evaluationSets"
-        eval_sets_dir.mkdir(exist_ok=True)
-
-        # Save file
-        file_path = eval_sets_dir / filename
-
-        try:
-            with open(file_path, 'w') as f:
-                json.dump(eval_set, f, indent=2)
-
-            console.success(f"\n✅ Created eval set: {filename}")
-            console.info(f"📊 Tests: {len(evaluations)}")
-            console.info(f"⚙️  Evaluators: {len(evaluator_refs)}")
-
-            self._discover_files()  # Refresh
-        except Exception as e:
-            console.error(f"Failed to create eval set: {e}")
-
-        input("\nPress Enter to continue...")
-
-    def _add_evaluations_interactive(self, eval_set_id: str) -> List[Dict[str, Any]]:
-        """Add evaluations interactively."""
-        evaluations = []
-        test_count = 1
-
-        while True:
-            console.info(f"\nTest Case #{test_count}")
-            test_name = self._get_input("Test Name (or 'done' to finish): ")
-            if test_name.lower() == 'done':
-                break
-
-            if not test_name:
-                console.warning("Test name is required!")
-                continue
-
-            # Simple inputs
-            console.info("Inputs (JSON format, e.g., {\"a\": 5, \"b\": 3})")
-            inputs_str = self._get_input("Inputs: ")
-            try:
-                inputs = json.loads(inputs_str) if inputs_str else {}
-            except json.JSONDecodeError:
-                console.warning("Invalid JSON, using empty inputs")
-                inputs = {}
-
-            # Expected output
-            console.info("Expected Output (JSON format)")
-            expected_str = self._get_input("Expected Output: ")
-            try:
-                expected_output = json.loads(expected_str) if expected_str else {}
-            except json.JSONDecodeError:
-                console.warning("Invalid JSON, using empty expected output")
-                expected_output = {}
-
-            evaluation: Dict[str, Any] = {
-                "id": f"test-{test_count}",
-                "name": test_name,
-                "inputs": inputs,
-                "expectedOutput": expected_output,
-                "expectedAgentBehavior": "",
-                "simulationInstructions": "",
-                "simulateInput": False,
-                "inputGenerationInstructions": "",
-                "simulateTools": False,
-                "toolsToSimulate": [],
-                "evalSetId": eval_set_id,
-                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
-            }
-            evaluations.append(evaluation)
-            test_count += 1
-
-        return evaluations
-
-    def _create_evaluator(self) -> None:
-        """Create new evaluator interactively."""
-        console.info("\n➕ Create New Evaluator")
-
-        # Select template
-        console.info("Templates:")
-        console.info("1. Exact Match")
-        console.info("2. JSON Similarity")
-
-        template = self._get_number_input("Template (1-2): ", 1, 2)
-        if template is None:
-            return
-
-        name = self._get_input("Name: ")
-        if not name:
-            return
-
-        # Template configurations
-        if template == 1:
-            evaluator = {
-                "id": f"eval-{name.lower().replace(' ', '-')}",
-                "name": name,
-                "description": "Exact match evaluator",
-                "category": 0,
-                "type": 1,
-                "targetOutputKey": "*",
-                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
-            }
-        else:  # JSON Similarity
-            evaluator = {
-                "id": f"eval-{name.lower().replace(' ', '-')}",
-                "name": name,
-                "description": "JSON similarity evaluator",
-                "category": 0,
-                "type": 6,
-                "targetOutputKey": "*",
-                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
-            }
-
-        # Ensure evaluators directory exists
-        evaluators_dir = self.project_root / "evaluators"
-        evaluators_dir.mkdir(exist_ok=True)
-
-        # Save file
-        filename = f"{name.lower().replace(' ', '_')}.json"
-        file_path = evaluators_dir / filename
-
-        with open(file_path, 'w') as f:
-            json.dump(evaluator, f, indent=2)
-
-        console.success(f"✅ Created evaluator: {filename}")
-        self._discover_files()  # Refresh
-
-    def _create_evaluator_interactive(self) -> None:
-        """Create new evaluator with comprehensive questions."""
-        self._clear_screen()
-        console.info("➕ Create New Evaluator - Interactive Wizard")
-        console.info("─" * 65)
-
-        # Basic Information
-        console.info("📝 Basic Information")
-        name = input("➤ Evaluator Name: ").strip()
-        if not name:
-            console.warning("Name is required!")
-            input("Press Enter to continue...")
-            return
-
-        description = input("➤ Description: ").strip() or f"{name} evaluator"
-
-        # Category Selection
-        console.info("\n🏷️ Category Selection")
-        categories = {
-            0: "Deterministic",
-            1: "LLM as Judge",
-            2: "Agent Scorer",
-            3: "Trajectory"
-        }
-
-        for key, value in categories.items():
-            console.info(f"  {key}. {value}")
-
-        try:
-            category = int(input("➤ Select Category (0-3): ") or "0")
-            if category not in categories:
-                category = 0
-        except ValueError:
-            category = 0
-
-        # Type Selection
-        console.info(f"\n🎯 Type Selection (Category: {categories[category]})")
-        types = {
-            0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex",
-            4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory"
-        }
-
-        # Show relevant types based on category
-        relevant_types = []
-        if category == 0:  # Deterministic
-            relevant_types = [1, 2, 3, 6]  # Exact Match, Contains, Regex, JSON Similarity
-        elif category == 1:  # LLM as Judge
-            relevant_types = [4, 5]  # Factuality, Custom
-        elif category == 3:  # Trajectory
-            relevant_types = [7]  # Trajectory
-        else:
-            relevant_types = list(types.keys())
-
-        for type_id in relevant_types:
-            console.info(f"  {type_id}. {types[type_id]}")
-
-        try:
-            eval_type = int(input(f"➤ Select Type ({', '.join(map(str, relevant_types))}): ") or str(relevant_types[0]))
-            if eval_type not in relevant_types:
-                eval_type = relevant_types[0]
-        except (ValueError, IndexError):
-            eval_type = 1
-
-        # Target Output Key
-        console.info("\n🔍 Target Configuration")
-        console.info("Target Output Key determines which part of the output to evaluate")
-        console.info("Examples: '*' (all), 'result', 'answer', 'output'")
-        target_key = input("➤ Target Output Key (default: '*'): ").strip() or "*"
-
-        # Create basic evaluator
-        evaluator = {
-            "id": f"eval-{name.lower().replace(' ', '-')}",
-            "name": name,
-            "description": description,
-            "category": category,
-            "type": eval_type,
-            "targetOutputKey": target_key,
-            "createdAt": "2025-01-25T00:00:00Z",
-            "updatedAt": "2025-01-25T00:00:00Z"
-        }
-
-        # LLM Configuration (if LLM as Judge)
-        if category == 1:  # LLM as Judge
-            console.info("\n🤖 LLM Configuration")
-            model_name = input("➤ Model Name (default: gpt-4): ").strip() or "gpt-4"
-
-            console.info("📝 Evaluation Prompt")
-            console.info("This prompt will be used to evaluate the agent's output")
-            prompt = input("➤ Evaluation Prompt: ").strip()
-
-            if prompt:
-                evaluator["llmConfig"] = {
-                    "modelName": model_name,
-                    "prompt": prompt,
-                    "temperature": 0.0,
-                    "maxTokens": 1000
-                }
-
-        # Ensure evaluators directory exists
-        evaluators_dir = self.project_root / "evaluators"
-        evaluators_dir.mkdir(exist_ok=True)
-
-        # Save file
-        filename = f"{name.lower().replace(' ', '_')}.json"
-        file_path = evaluators_dir / filename
-
-        try:
-            with open(file_path, 'w') as f:
-                json.dump(evaluator, f, indent=2)
-
-            console.success(f"\n✅ Created evaluator: {filename}")
-            console.info(f"🏷️  Category: {categories[category]}")
-            console.info(f"🎯 Type: {types[eval_type]}")
-            console.info(f"🔍 Target: {target_key}")
-
-            self._discover_files()  # Refresh
-        except Exception as e:
-            console.error(f"Failed to create evaluator: {e}")
-
-        input("\nPress Enter to continue...")
-
-    def _get_number_input(self, prompt: str, min_val: int, max_val: int) -> Optional[int]:
-        """Get number input with validation."""
-        try:
-            value = input(f"➤ {prompt}")
-            num = int(value)
-            if min_val <= num <= max_val:
-                return num
-            else:
-                console.warning(f"Please enter a number between {min_val} and {max_val}")
-                return None
-        except (ValueError, KeyboardInterrupt):
-            return None
-
-    def _get_evaluator_id(self, path: Path) -> str:
-        """Get evaluator ID from file."""
-        try:
-            with open(path) as f:
-                data = json.load(f)
-            return data.get("id", path.stem)
-        except Exception:
-            return path.stem
-
-
-def launch_interactive_cli(project_root: Optional[Path] = None) -> None:
-    """Launch the interactive CLI."""
-    cli = InteractiveEvalCLI(project_root)
-    cli.run()
diff --git a/src/uipath/_cli/_interactive/__init__.py b/src/uipath/_cli/_interactive/__init__.py
new file mode 100644
index 000000000..3fe5a81ab
--- /dev/null
+++ b/src/uipath/_cli/_interactive/__init__.py
@@ -0,0 +1,5 @@
+"""Interactive evaluation CLI module."""
+
+from ._main import launch_interactive_cli
+
+__all__ = ["launch_interactive_cli"]
diff --git a/src/uipath/_cli/_interactive/_discovery.py b/src/uipath/_cli/_interactive/_discovery.py
new file mode 100644
index 000000000..b0b4a68b1
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_discovery.py
@@ -0,0 +1,46 @@
+"""Discovery utilities for finding eval sets and evaluators."""
+# type: ignore
+
+import json
+from typing import TYPE_CHECKING
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+
+class DiscoveryMixin:
+    """Mixin for file discovery operations."""
+
+    def _discover_files(self: "InteractiveEvalCLI") -> None:
+        """Quickly discover eval sets and evaluators."""
+        # Clear existing lists to avoid duplicates
+        self.eval_sets.clear()
+        self.evaluators.clear()
+
+        # Find eval sets from evaluationSets folder
+        eval_sets_dir = self.project_root / "evaluationSets"
+        if eval_sets_dir.exists():
+            for eval_file in eval_sets_dir.glob("*.json"):
+                try:
+                    with open(eval_file) as f:
+                        data = json.load(f)
+                    # Check if it's an eval set by presence of "evaluations" array
+                    if "evaluations" in data and isinstance(data.get("evaluations"), list):
+                        name = data.get("name", eval_file.stem)
+                        self.eval_sets.append((name, eval_file))
+                except Exception:
+                    pass
+
+        # Find evaluators from evaluators folder
+        evaluators_dir = self.project_root / "evaluators"
+        if evaluators_dir.exists():
+            for eval_file in evaluators_dir.glob("*.json"):
+                try:
+                    with open(eval_file) as f:
+                        data = json.load(f)
+                    # Verify it has evaluator-specific fields
+                    if "id" in data and "type" in data:
+                        name = data.get("name", eval_file.stem)
+                        self.evaluators.append((name, eval_file))
+                except Exception:
+                    pass
diff --git a/src/uipath/_cli/_interactive/_drill_down.py b/src/uipath/_cli/_interactive/_drill_down.py
new file mode 100644
index 000000000..6bdcf7453
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_drill_down.py
@@ -0,0 +1,88 @@
+"""Drill-down navigation for eval sets and evaluators."""
+# type: ignore
+
+from typing import TYPE_CHECKING
+
+from .._utils._console import ConsoleLogger
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+console = ConsoleLogger()
+
+
+class DrillDownMixin:
+    """Mixin for drill-down navigation operations."""
+
+    def _drill_down_eval_sets(self: "InteractiveEvalCLI") -> None:
+        """Drill down into eval sets with navigation."""
+        if not self.eval_sets:
+            self._show_no_items_screen("eval sets")
+            return
+
+        current_selection = 0
+        while True:
+            self._clear_screen()
+            console.info("📋 Eval Sets - Navigate & Select")
+            console.info("⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back")
+            console.info("─" * 65)
+
+            for i, (name, path) in enumerate(self.eval_sets):
+                if i == current_selection:
+                    console.info(f"► {i+1}. {name} ◄")
+                    self._show_eval_set_preview(path)
+                else:
+                    console.info(f"  {i+1}. {name}")
+
+            key = self._get_key_input()
+
+            if key in ['q', 'Q', 'back']:
+                break
+            elif key == 'up':
+                current_selection = (current_selection - 1) % len(self.eval_sets)
+            elif key == 'down':
+                current_selection = (current_selection + 1) % len(self.eval_sets)
+            elif key in ['enter', ' ']:
+                self._show_eval_set_details(self.eval_sets[current_selection])
+            elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets):
+                current_selection = int(key) - 1
+
+    def _drill_down_evaluators(self: "InteractiveEvalCLI") -> None:
+        """Drill down into evaluators with navigation."""
+        if not self.evaluators:
+            self._show_no_items_screen("evaluators")
+            return
+
+        current_selection = 0
+        while True:
+            self._clear_screen()
+            console.info("⚙️  Evaluators - Navigate & Select")
+            console.info("⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back")
+            console.info("─" * 65)
+
+            for i, (name, path) in enumerate(self.evaluators):
+                if i == current_selection:
+                    console.info(f"► {i+1}. {name} ◄")
+                    self._show_evaluator_preview(path)
+                else:
+                    console.info(f"  {i+1}. {name}")
+
+            key = self._get_key_input()
+
+            if key in ['q', 'Q', 'back']:
+                break
+            elif key == 'up':
+                current_selection = (current_selection - 1) % len(self.evaluators)
+            elif key == 'down':
+                current_selection = (current_selection + 1) % len(self.evaluators)
+            elif key in ['enter', ' ']:
+                self._show_evaluator_details(self.evaluators[current_selection])
+            elif key.isdigit() and 1 <= int(key) <= len(self.evaluators):
+                current_selection = int(key) - 1
+
+    def _show_no_items_screen(self: "InteractiveEvalCLI", item_type: str) -> None:
+        """Show no items screen."""
+        self._clear_screen()
+        console.warning(f"No {item_type} found!")
+        console.info("Press Enter to go back...")
+        self._get_input("")
diff --git a/src/uipath/_cli/_interactive/_eval_sets.py b/src/uipath/_cli/_interactive/_eval_sets.py
new file mode 100644
index 000000000..9f6382149
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_eval_sets.py
@@ -0,0 +1,329 @@
+"""Eval set operations for interactive CLI."""
+# type: ignore
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING, Any, Dict, List
+
+from .._utils._console import ConsoleLogger
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+console = ConsoleLogger()
+
+
+class EvalSetMixin:
+    """Mixin for eval set operations."""
+
+    def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None:
+        """Create new evaluation set - simplified version."""
+        self._clear_screen()
+        console.info("➕ Create New Eval Set")
+        console.info("─" * 65)
+
+        name = self._get_input("Name: ")
+        if not name:
+            return
+
+        # Create clean filename from name
+        filename = f"{name.lower().replace(' ', '_')}.json"
+
+        # Create basic eval set
+        eval_set = {
+            "id": f"eval-{len(self.eval_sets) + 1}",
+            "fileName": filename,
+            "evaluatorRefs": [],
+            "name": name,
+            "batchSize": 10,
+            "timeoutMinutes": 20,
+            "modelSettings": [],
+            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            "evaluations": []
+        }
+
+        # Ask if they want to add evaluations
+        add_evals = self._get_input("Add evaluations now? (y/n): ").lower()
+        if add_evals in ['y', 'yes']:
+            eval_set["evaluations"] = self._add_evaluations_interactive(str(eval_set["id"]))
+
+        # Ensure evaluationSets directory exists
+        eval_sets_dir = self.project_root / "evaluationSets"
+        eval_sets_dir.mkdir(exist_ok=True)
+
+        # Save file
+        file_path = eval_sets_dir / filename
+
+        with open(file_path, 'w') as f:
+            json.dump(eval_set, f, indent=2)
+
+        console.success(f"✅ Created eval set: {filename}")
+        self._discover_files()  # Refresh
+
+    def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
+        """Create new evaluation set with comprehensive questions."""
+        self._clear_screen()
+        console.info("➕ Create New Eval Set - Interactive Wizard")
+        console.info("─" * 65)
+
+        # Basic Information
+        console.info("📝 Basic Information")
+        name = input("➤ Eval Set Name: ").strip()
+        if not name:
+            console.warning("Name is required!")
+            input("Press Enter to continue...")
+            return
+
+        # Create clean filename from name
+        filename = f"{name.lower().replace(' ', '_')}.json"
+
+        # Evaluator References
+        console.info("\n🎯 Evaluator References")
+        console.info("Available evaluators:")
+        for i, (eval_name, _) in enumerate(self.evaluators, 1):
+            console.info(f"  {i}. {eval_name}")
+
+        evaluator_refs = []
+        if self.evaluators:
+            refs_input = input("➤ Select evaluators (comma-separated numbers, or 'all'): ").strip()
+            if refs_input.lower() == 'all':
+                evaluator_refs = [self._get_evaluator_id(path) for eval_name, path in self.evaluators]
+            elif refs_input:
+                try:
+                    for num in refs_input.split(','):
+                        idx = int(num.strip()) - 1
+                        if 0 <= idx < len(self.evaluators):
+                            eval_path = self.evaluators[idx][1]
+                            eval_id = self._get_evaluator_id(eval_path)
+                            evaluator_refs.append(eval_id)
+                except ValueError:
+                    console.warning("Invalid input, no evaluators selected")
+
+        # Test Cases
+        console.info("\n📝 Test Cases")
+        evaluations = []
+        test_count = 1
+
+        while True:
+            console.info(f"\nTest Case #{test_count}")
+            test_name = input("➤ Test Name (or 'done' to finish): ").strip()
+            if test_name.lower() == 'done':
+                break
+
+            if not test_name:
+                console.warning("Test name is required!")
+                continue
+
+            # Inputs
+            console.info("📥 Inputs (JSON format)")
+            console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}")
+            inputs_str = input("➤ Inputs: ").strip()
+            try:
+                inputs = json.loads(inputs_str) if inputs_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty inputs")
+                inputs = {}
+
+            # Expected Output
+            console.info("📤 Expected Output (JSON format)")
+            expected_str = input("➤ Expected Output: ").strip()
+            try:
+                expected_output = json.loads(expected_str) if expected_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty expected output")
+                expected_output = {}
+
+            evaluation: Dict[str, Any] = {
+                "id": f"test-{test_count}",
+                "name": test_name,
+                "inputs": inputs,
+                "expectedOutput": expected_output,
+                "expectedAgentBehavior": "",
+                "simulationInstructions": "",
+                "simulateInput": False,
+                "inputGenerationInstructions": "",
+                "simulateTools": False,
+                "toolsToSimulate": [],
+                "evalSetId": f"eval-{len(self.eval_sets) + 1}",
+                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+            }
+            evaluations.append(evaluation)
+            test_count += 1
+
+        if not evaluations:
+            console.warning("At least one test case is required!")
+            input("Press Enter to continue...")
+            return
+
+        # Create eval set
+        eval_set = {
+            "id": f"eval-{len(self.eval_sets) + 1}",
+            "fileName": filename,
+            "evaluatorRefs": evaluator_refs,
+            "name": name,
+            "batchSize": 10,
+            "timeoutMinutes": 20,
+            "modelSettings": [],
+            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            "evaluations": evaluations
+        }
+
+        # Ensure evaluationSets directory exists
+        eval_sets_dir = self.project_root / "evaluationSets"
+        eval_sets_dir.mkdir(exist_ok=True)
+
+        # Save file
+        file_path = eval_sets_dir / filename
+
+        try:
+            with open(file_path, 'w') as f:
+                json.dump(eval_set, f, indent=2)
+
+            console.success(f"\n✅ Created eval set: {filename}")
+            console.info(f"📊 Tests: {len(evaluations)}")
+            console.info(f"⚙️  Evaluators: {len(evaluator_refs)}")
+
+            self._discover_files()  # Refresh
+        except Exception as e:
+            console.error(f"Failed to create eval set: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -> List[Dict[str, Any]]:
+        """Add evaluations interactively."""
+        evaluations = []
+        test_count = 1
+
+        while True:
+            console.info(f"\nTest Case #{test_count}")
+            test_name = self._get_input("Test Name (or 'done' to finish): ")
+            if test_name.lower() == 'done':
+                break
+
+            if not test_name:
+                console.warning("Test name is required!")
+                continue
+
+            # Inputs
+            console.info("📥 Inputs (JSON format)")
+            console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}")
+            inputs_str = input("➤ Inputs: ").strip()
+            try:
+                inputs = json.loads(inputs_str) if inputs_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty inputs")
+                inputs = {}
+
+            # Expected Output
+            console.info("📤 Expected Output (JSON format)")
+            expected_str = input("➤ Expected Output: ").strip()
+            try:
+                expected_output = json.loads(expected_str) if expected_str else {}
+            except json.JSONDecodeError:
+                console.warning("Invalid JSON, using empty expected output")
+                expected_output = {}
+
+            evaluation: Dict[str, Any] = {
+                "id": f"test-{test_count}",
+                "name": test_name,
+                "inputs": inputs,
+                "expectedOutput": expected_output,
+                "expectedAgentBehavior": "",
+                "simulationInstructions": "",
+                "simulateInput": False,
+                "inputGenerationInstructions": "",
+                "simulateTools": False,
+                "toolsToSimulate": [],
+                "evalSetId": eval_set_id,
+                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+            }
+            evaluations.append(evaluation)
+            test_count += 1
+
+        return evaluations
+
+    def _list_eval_sets(self: "InteractiveEvalCLI") -> None:
+        """List available eval sets."""
+        console.info("\n📋 Available Eval Sets:")
+        if not self.eval_sets:
+            console.warning("No eval sets found")
+            return
+
+        for i, (name, path) in enumerate(self.eval_sets, 1):
+            try:
+                with open(path) as f:
+                    data = json.load(f)
+                test_count = len(data.get("evaluations", []))
+                evaluator_count = len(data.get("evaluatorRefs", []))
+                console.info(f"{i}. {name}")
+                console.info(f"   Tests: {test_count} | Evaluators: {evaluator_count}")
+                console.info(f"   File: {path.name}")
+            except Exception:
+                console.info(f"{i}. {name} (error loading)")
+
+    def _show_eval_set_preview(self: "InteractiveEvalCLI", path: Path) -> None:
+        """Show eval set preview info."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            test_count = len(data.get("evaluations", []))
+            evaluator_count = len(data.get("evaluatorRefs", []))
+            console.info(f"    📄 {path.name}")
+            console.info(f"    📊 Tests: {test_count} | Evaluators: {evaluator_count}")
+        except Exception:
+            console.info(f"    📄 {path.name} (error loading)")
+
+    def _show_eval_set_details(self: "InteractiveEvalCLI", eval_set_tuple: tuple[str, Path]) -> None:
+        """Show detailed eval set view."""
+        name, path = eval_set_tuple
+        self._clear_screen()
+        console.info(f"📋 Eval Set Details: {name}")
+        console.info("─" * 65)
+
+        try:
+            with open(path) as f:
+                data = json.load(f)
+
+            console.info(f"\n📄 {path.name}")
+            console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
+            console.info(f"📊 Tests: {len(data.get('evaluations', []))}")
+            console.info(f"⚙️  Evaluators: {len(data.get('evaluatorRefs', []))}")
+            console.info(f"📦 Batch Size: {data.get('batchSize', 'Unknown')}")
+            console.info(f"⏱️  Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes")
+
+            evaluator_refs = data.get('evaluatorRefs', [])
+            if evaluator_refs:
+                console.info("\n🎯 Evaluator References:")
+                for ref in evaluator_refs:
+                    console.info(f"   • {ref}")
+
+            evaluations = data.get('evaluations', [])
+            if evaluations:
+                console.info("\n📝 Test Cases:")
+                for i, eval_data in enumerate(evaluations[:10], 1):  # Show first 10
+                    test_name = eval_data.get('name', f'Test {i}')
+                    console.info(f"   {i}. {test_name}")
+                    if 'inputs' in eval_data:
+                        inputs_preview = str(eval_data['inputs'])[:60]
+                        if len(str(eval_data['inputs'])) > 60:
+                            inputs_preview += "..."
+                        console.info(f"      Input: {inputs_preview}")
+                    if 'expectedOutput' in eval_data:
+                        output_preview = str(eval_data['expectedOutput'])[:60]
+                        if len(str(eval_data['expectedOutput'])) > 60:
+                            output_preview += "..."
+                        console.info(f"      Expected: {output_preview}")
+
+                if len(evaluations) > 10:
+                    console.info(f"\n   ... and {len(evaluations) - 10} more tests")
+
+        except Exception as e:
+            console.error(f"Error loading eval set: {e}")
+
+        console.info("\n💡 Press Backspace to go back")
+        self._get_key_input()
diff --git a/src/uipath/_cli/_interactive/_evaluators.py b/src/uipath/_cli/_interactive/_evaluators.py
new file mode 100644
index 000000000..bb2968569
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_evaluators.py
@@ -0,0 +1,273 @@
+"""Evaluator operations for interactive CLI."""
+# type: ignore
+
+import json
+from datetime import datetime, timezone
+from pathlib import Path
+from typing import TYPE_CHECKING
+
+from .._utils._console import ConsoleLogger
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+console = ConsoleLogger()
+
+
+class EvaluatorMixin:
+    """Mixin for evaluator operations."""
+
+    def _create_evaluator_simple(self: "InteractiveEvalCLI") -> None:
+        """Create new evaluator - simplified version."""
+        self._clear_screen()
+        console.info("➕ Create New Evaluator")
+        console.info("─" * 65)
+
+        name = self._get_input("Name: ")
+        if not name:
+            return
+
+        # Create basic evaluator
+        evaluator = {
+            "id": f"eval-{name.lower().replace(' ', '-')}",
+            "name": name,
+            "description": f"{name} evaluator",
+            "category": 0,
+            "type": 1,
+            "targetOutputKey": "*",
+            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+        }
+
+        # Ensure evaluators directory exists
+        evaluators_dir = self.project_root / "evaluators"
+        evaluators_dir.mkdir(exist_ok=True)
+
+        # Save file
+        filename = f"{name.lower().replace(' ', '_')}.json"
+        file_path = evaluators_dir / filename
+
+        with open(file_path, 'w') as f:
+            json.dump(evaluator, f, indent=2)
+
+        console.success(f"✅ Created evaluator: {filename}")
+        self._discover_files()  # Refresh
+
+    def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
+        """Create new evaluator with comprehensive questions."""
+        self._clear_screen()
+        console.info("➕ Create New Evaluator - Interactive Wizard")
+        console.info("─" * 65)
+
+        # Basic Information
+        console.info("📝 Basic Information")
+        name = input("➤ Evaluator Name: ").strip()
+        if not name:
+            console.warning("Name is required!")
+            input("Press Enter to continue...")
+            return
+
+        description = input("➤ Description: ").strip() or f"{name} evaluator"
+
+        # Category Selection
+        console.info("\n🏷️ Category Selection")
+        categories = {
+            0: "Deterministic",
+            1: "LLM as Judge",
+            2: "Agent Scorer",
+            3: "Trajectory"
+        }
+
+        for key, value in categories.items():
+            console.info(f"  {key}. {value}")
+
+        try:
+            category = int(input("➤ Select Category (0-3): ") or "0")
+            if category not in categories:
+                category = 0
+        except ValueError:
+            category = 0
+
+        # Type Selection
+        console.info(f"\n🎯 Type Selection (Category: {categories[category]})")
+        types = {
+            0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex",
+            4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory"
+        }
+
+        # Show relevant types based on category
+        relevant_types = []
+        if category == 0:  # Deterministic
+            relevant_types = [1, 2, 3, 6]  # Exact Match, Contains, Regex, JSON Similarity
+        elif category == 1:  # LLM as Judge
+            relevant_types = [4, 5]  # Factuality, Custom
+        elif category == 3:  # Trajectory
+            relevant_types = [7]  # Trajectory
+        else:
+            relevant_types = list(types.keys())
+
+        for type_id in relevant_types:
+            console.info(f"  {type_id}. {types[type_id]}")
+
+        try:
+            eval_type = int(input(f"➤ Select Type ({', '.join(map(str, relevant_types))}): ") or str(relevant_types[0]))
+            if eval_type not in relevant_types:
+                eval_type = relevant_types[0]
+        except (ValueError, IndexError):
+            eval_type = 1
+
+        # Target Output Key
+        console.info("\n🔍 Target Configuration")
+        console.info("Target Output Key determines which part of the output to evaluate")
+        console.info("Examples: '*' (all), 'result', 'answer', 'output'")
+        target_key = input("➤ Target Output Key (default: '*'): ").strip() or "*"
+
+        # Create basic evaluator
+        evaluator = {
+            "id": f"eval-{name.lower().replace(' ', '-')}",
+            "name": name,
+            "description": description,
+            "category": category,
+            "type": eval_type,
+            "targetOutputKey": target_key,
+            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+        }
+
+        # LLM Configuration (if LLM as Judge)
+        if category == 1:  # LLM as Judge
+            console.info("\n🤖 LLM Configuration")
+            model_name = input("➤ Model Name (default: gpt-4): ").strip() or "gpt-4"
+
+            console.info("📝 Evaluation Prompt")
+            console.info("This prompt will be used to evaluate the agent's output")
+            prompt = input("➤ Evaluation Prompt: ").strip()
+
+            if prompt:
+                evaluator["llmConfig"] = {
+                    "modelName": model_name,
+                    "prompt": prompt,
+                    "temperature": 0.0,
+                    "maxTokens": 1000
+                }
+
+        # Ensure evaluators directory exists
+        evaluators_dir = self.project_root / "evaluators"
+        evaluators_dir.mkdir(exist_ok=True)
+
+        # Save file
+        filename = f"{name.lower().replace(' ', '_')}.json"
+        file_path = evaluators_dir / filename
+
+        try:
+            with open(file_path, 'w') as f:
+                json.dump(evaluator, f, indent=2)
+
+            console.success(f"\n✅ Created evaluator: {filename}")
+            console.info(f"🏷️  Category: {categories[category]}")
+            console.info(f"🎯 Type: {types[eval_type]}")
+            console.info(f"🔍 Target: {target_key}")
+
+            self._discover_files()  # Refresh
+        except Exception as e:
+            console.error(f"Failed to create evaluator: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _list_evaluators(self: "InteractiveEvalCLI") -> None:
+        """List available evaluators."""
+        console.info("\n⚙️  Available Evaluators:")
+        if not self.evaluators:
+            console.warning("No evaluators found")
+            return
+
+        for i, (name, path) in enumerate(self.evaluators, 1):
+            try:
+                with open(path) as f:
+                    data = json.load(f)
+                category = self._get_category_name(data.get("category", 0))
+                type_name = self._get_type_name(data.get("type", 1))
+                console.info(f"{i}. {name}")
+                console.info(f"   Type: {category} | {type_name}")
+                console.info(f"   File: {path.name}")
+            except Exception:
+                console.info(f"{i}. {name} (error loading)")
+
+    def _show_evaluator_preview(self: "InteractiveEvalCLI", path: Path) -> None:
+        """Show evaluator preview info."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            category = self._get_category_name(data.get("category", 0))
+            type_name = self._get_type_name(data.get("type", 1))
+            console.info(f"    📄 {path.name}")
+            console.info(f"    🎯 Type: {category} | {type_name}")
+        except Exception:
+            console.info(f"    📄 {path.name} (error loading)")
+
+    def _show_evaluator_details(self: "InteractiveEvalCLI", evaluator_tuple: tuple[str, Path]) -> None:
+        """Show detailed evaluator view."""
+        name, path = evaluator_tuple
+        self._clear_screen()
+        console.info(f"⚙️  Evaluator Details: {name}")
+        console.info("─" * 65)
+
+        try:
+            with open(path) as f:
+                data = json.load(f)
+
+            console.info(f"\n📄 {path.name}")
+            console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
+            console.info(f"📝 Description: {data.get('description', 'No description')}")
+            console.info(f"🏷️  Category: {self._get_category_name(data.get('category', 0))}")
+            console.info(f"🎯 Type: {self._get_type_name(data.get('type', 1))}")
+            console.info(f"🔍 Target Key: {data.get('targetOutputKey', '*')}")
+
+            if 'llmConfig' in data:
+                llm_config = data['llmConfig']
+                console.info("\n🤖 LLM Configuration:")
+                console.info(f"   Model: {llm_config.get('modelName', 'Unknown')}")
+                if 'prompt' in llm_config:
+                    prompt_preview = llm_config['prompt'][:100]
+                    if len(llm_config['prompt']) > 100:
+                        prompt_preview += "..."
+                    console.info(f"   Prompt: {prompt_preview}")
+
+        except Exception as e:
+            console.error(f"Error loading evaluator: {e}")
+
+        console.info("\n💡 Press Backspace to go back")
+        self._get_key_input()
+
+    def _get_category_name(self: "InteractiveEvalCLI", category: int) -> str:
+        """Get category name from number."""
+        categories = {
+            0: "Deterministic",
+            1: "LLM as Judge",
+            2: "Agent Scorer",
+            3: "Trajectory"
+        }
+        return categories.get(category, "Unknown")
+
+    def _get_type_name(self: "InteractiveEvalCLI", eval_type: int) -> str:
+        """Get type name from number."""
+        types = {
+            0: "Unknown",
+            1: "Exact Match",
+            2: "Contains",
+            3: "Regex",
+            4: "Factuality",
+            5: "Custom",
+            6: "JSON Similarity",
+            7: "Trajectory"
+        }
+        return types.get(eval_type, "Unknown")
+
+    def _get_evaluator_id(self: "InteractiveEvalCLI", path: Path) -> str:
+        """Get evaluator ID from file."""
+        try:
+            with open(path) as f:
+                data = json.load(f)
+            return data.get("id", path.stem)
+        except Exception:
+            return path.stem
diff --git a/src/uipath/_cli/_interactive/_execution.py b/src/uipath/_cli/_interactive/_execution.py
new file mode 100644
index 000000000..f2283bc2b
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_execution.py
@@ -0,0 +1,135 @@
+"""Execution utilities for running evaluations."""
+# type: ignore
+
+import subprocess
+import sys
+from pathlib import Path
+from typing import TYPE_CHECKING, Optional
+
+from .._utils._console import ConsoleLogger
+
+if TYPE_CHECKING:
+    from ._main import InteractiveEvalCLI
+
+console = ConsoleLogger()
+
+
+class ExecutionMixin:
+    """Mixin for execution operations."""
+
+    def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None:
+        """Execute evaluation with live results."""
+        console.info("\n🚀 Running evaluation...")
+
+        # Find main.py
+        main_py = self._find_main_py()
+        if not main_py:
+            console.error("Could not find main.py")
+            return
+
+        # Build command - run from the project directory
+        cmd = [
+            sys.executable, "-m", "uipath._cli.cli_eval",
+            str(main_py.relative_to(self.project_root)),
+            str(eval_path.relative_to(self.project_root)),
+            "--no-report", "--workers", "1"
+        ]
+
+        console.info(f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report")
+
+        try:
+            # Run with real-time output from project directory
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1,
+                universal_newlines=True,
+                cwd=self.project_root
+            )
+
+            # Stream output in real-time
+            if process.stdout:
+                for line in process.stdout:
+                    print(line.rstrip())
+
+            process.wait()
+
+            if process.returncode == 0:
+                console.success("\n✅ Evaluation completed successfully!")
+            else:
+                console.error(f"\n❌ Evaluation failed (exit code: {process.returncode})")
+
+        except Exception as e:
+            console.error(f"Failed to run evaluation: {e}")
+
+    def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) -> None:
+        """Execute evaluation without clearing screen."""
+        console.info("\n🚀 Running evaluation...")
+
+        # Find main.py
+        main_py = self._find_main_py()
+        if not main_py:
+            console.error("Could not find main.py")
+            input("\nPress Enter to continue...")
+            return
+
+        # Build command - run from the project directory
+        cmd = [
+            sys.executable, "-m", "uipath._cli.cli_eval",
+            str(main_py.relative_to(self.project_root)),
+            str(eval_path.relative_to(self.project_root)),
+            "--no-report", "--workers", "1"
+        ]
+
+        console.info(f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report")
+
+        try:
+            # Run with real-time output from project directory
+            process = subprocess.Popen(
+                cmd,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                bufsize=1,
+                universal_newlines=True,
+                cwd=self.project_root
+            )
+
+            # Stream output in real-time
+            if process.stdout:
+                for line in process.stdout:
+                    print(line.rstrip())
+
+            process.wait()
+
+            if process.returncode == 0:
+                console.success("\n✅ Evaluation completed successfully!")
+            else:
+                console.error(f"\n❌ Evaluation failed (exit code: {process.returncode})")
+
+        except Exception as e:
+            console.error(f"Failed to run evaluation: {e}")
+
+        input("\nPress Enter to continue...")
+
+    def _find_main_py(self: "InteractiveEvalCLI") -> Optional[Path]:
+        """Find main.py file."""
+        # Check current directory
+        main_py = self.project_root / "main.py"
+        if main_py.exists():
+            return main_py
+
+        # Check parent directories
+        for parent in self.project_root.parents:
+            main_py = parent / "main.py"
+            if main_py.exists():
+                return main_py
+
+        return None
+
+    def _confirm(self: "InteractiveEvalCLI", prompt: str) -> bool:
+        """Ask for confirmation."""
+        response = self._get_input(f"{prompt} (y/n): ").lower()
+        return response in ['y', 'yes']
diff --git a/src/uipath/_cli/_interactive/_main.py b/src/uipath/_cli/_interactive/_main.py
new file mode 100644
index 000000000..c15afc2e0
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_main.py
@@ -0,0 +1,193 @@
+"""Main interactive CLI for evaluations."""
+
+from pathlib import Path
+from typing import List, Optional, Tuple
+
+from .._utils._console import ConsoleLogger
+from ._discovery import DiscoveryMixin
+from ._drill_down import DrillDownMixin
+from ._eval_sets import EvalSetMixin
+from ._evaluators import EvaluatorMixin
+from ._execution import ExecutionMixin
+from ._navigation import HAS_NAVIGATION, NavigationMixin
+
+console = ConsoleLogger()
+
+
+class InteractiveEvalCLI(
+    NavigationMixin,
+    DiscoveryMixin,
+    EvalSetMixin,
+    EvaluatorMixin,
+    ExecutionMixin,
+    DrillDownMixin
+):
+    """Simple, fast, keyboard-driven evaluation CLI."""
+
+    def __init__(self, project_root: Optional[Path] = None):
+        self.project_root = project_root or Path.cwd()
+        self.eval_sets: List[Tuple[str, Path]] = []
+        self.evaluators: List[Tuple[str, Path]] = []
+        self.current_selection = 0
+        self.menu_items = [
+            "📋 List eval sets",
+            "⚙️  List evaluators",
+            "⚡ Quick run (auto-select)",
+            "➕ Create eval set",
+            "➕ Create evaluator",
+            "🎯 Run specific combination"
+        ]
+        self._discover_files()
+
+    def run(self) -> None:
+        """Run the interactive CLI."""
+        self._show_ascii_art()
+
+        if not HAS_NAVIGATION:
+            console.warning("⚠️  Terminal navigation not available. Using fallback mode.")
+            console.info("Consider using a standard terminal for better experience.\n")
+            self._run_fallback_mode()
+            return
+
+        try:
+            self._run_navigation_mode()
+        except KeyboardInterrupt:
+            console.info("\n👋 Goodbye!")
+
+    def _run_navigation_mode(self) -> None:
+        """Run with arrow key navigation."""
+        while True:
+            self._clear_screen()
+            self._show_ascii_art()
+            self._show_menu(self.current_selection, self.menu_items)
+
+            key = self._get_key_input()
+
+            if key == 'up':
+                self.current_selection = (self.current_selection - 1) % len(self.menu_items)
+            elif key == 'down':
+                self.current_selection = (self.current_selection + 1) % len(self.menu_items)
+            elif key in ['enter', ' ']:
+                self._execute_menu_item_with_navigation(self.current_selection)
+            elif key.isdigit() and 1 <= int(key) <= 6:
+                self._execute_menu_item_with_navigation(int(key) - 1)
+
+    def _execute_menu_item_with_navigation(self, index: int) -> None:
+        """Execute menu item with navigation support."""
+        if index == 0:
+            self._drill_down_eval_sets()
+        elif index == 1:
+            self._drill_down_evaluators()
+        elif index == 2:
+            self._quick_run_with_navigation()
+        elif index == 3:
+            self._create_eval_set_interactive()
+        elif index == 4:
+            self._create_evaluator_interactive()
+        elif index == 5:
+            self._run_specific_combination()
+
+    def _run_fallback_mode(self) -> None:
+        """Run without navigation - simple text interface."""
+        while True:
+            console.info("\n⚙️  Main Menu:")
+            for i, item in enumerate(self.menu_items, 1):
+                console.info(f"  {i}. {item}")
+            console.info("  0. Exit")
+
+            try:
+                choice = input("\n➤ Select option: ").strip()
+
+                if choice == '0':
+                    console.info("👋 Goodbye!")
+                    break
+                elif choice == '1':
+                    self._list_eval_sets_navigation()
+                elif choice == '2':
+                    self._list_evaluators()
+                elif choice == '3':
+                    self._quick_run()
+                elif choice == '4':
+                    self._create_eval_set_simple()
+                elif choice == '5':
+                    self._create_evaluator_simple()
+                elif choice == '6':
+                    self._run_specific_combination()
+                else:
+                    console.warning("Invalid option")
+            except KeyboardInterrupt:
+                console.info("\n👋 Goodbye!")
+                break
+
+    def _quick_run_with_navigation(self) -> None:
+        """Quick run evaluation with auto-selected eval set."""
+        if not self.eval_sets:
+            self._clear_screen()
+            console.warning("No eval sets found!")
+            console.info("Press Enter to go back...")
+            self._get_input("")
+            return
+
+        # Use first eval set
+        eval_name, eval_path = self.eval_sets[0]
+
+        self._clear_screen()
+        console.info(f"⚡ Quick Run: {eval_name}")
+        console.info("─" * 65)
+
+        if self._confirm("Run evaluation now?"):
+            self._execute_evaluation_no_clear(eval_path)
+
+    def _quick_run(self) -> None:
+        """Quick run evaluation with auto-selected eval set."""
+        if not self.eval_sets:
+            console.warning("No eval sets found!")
+            return
+
+        # Use first eval set
+        eval_name, eval_path = self.eval_sets[0]
+        console.info(f"\n⚡ Quick Run: {eval_name}")
+
+        if self._confirm("Run evaluation now?"):
+            self._execute_evaluation(eval_path)
+
+    def _list_eval_sets_navigation(self) -> None:
+        """List eval sets with navigation."""
+        self._clear_screen()
+        console.info("📋 Available Eval Sets")
+        console.info("─" * 65)
+        self._list_eval_sets()
+        input("\nPress Enter to continue...")
+
+    def _run_specific_combination(self) -> None:
+        """Run specific eval set and evaluator combination."""
+        self._clear_screen()
+        console.info("🎯 Run Specific Combination")
+        console.info("─" * 65)
+
+        # Select eval set
+        console.info("\n📋 Select Eval Set:")
+        for i, (name, _) in enumerate(self.eval_sets, 1):
+            console.info(f"  {i}. {name}")
+
+        try:
+            eval_idx = int(input("\n➤ Eval Set Number: ").strip()) - 1
+            if not (0 <= eval_idx < len(self.eval_sets)):
+                console.error("Invalid selection")
+                input("\nPress Enter to continue...")
+                return
+
+            eval_name, eval_path = self.eval_sets[eval_idx]
+
+            console.info(f"\n✅ Selected: {eval_name}")
+            if self._confirm("Run evaluation now?"):
+                self._execute_evaluation_no_clear(eval_path)
+        except ValueError:
+            console.error("Invalid selection")
+            input("\nPress Enter to continue...")
+
+
+def launch_interactive_cli(project_root: Optional[Path] = None) -> None:
+    """Launch the interactive CLI."""
+    cli = InteractiveEvalCLI(project_root)
+    cli.run()
diff --git a/src/uipath/_cli/_interactive/_navigation.py b/src/uipath/_cli/_interactive/_navigation.py
new file mode 100644
index 000000000..66514716b
--- /dev/null
+++ b/src/uipath/_cli/_interactive/_navigation.py
@@ -0,0 +1,109 @@
+"""Navigation and input handling for interactive CLI."""
+
+import sys
+import termios
+import tty
+
+from .._utils._console import ConsoleLogger
+
+console = ConsoleLogger()
+
+
+def has_termios() -> bool:
+    """Check if we have termios support for advanced input."""
+    try:
+        termios.tcgetattr(sys.stdin)
+        return True
+    except Exception:
+        return False
+
+
+HAS_NAVIGATION = has_termios()
+
+
+class NavigationMixin:
+    """Mixin for navigation and input handling."""
+
+    def _clear_screen(self) -> None:
+        """Clear the screen."""
+        print("\033[2J\033[H", end="")
+
+    def _get_input(self, prompt: str) -> str:
+        """Get input from user."""
+        return input(prompt).strip()
+
+    def _get_key_input(self) -> str:
+        """Get key input with arrow key support."""
+        if not HAS_NAVIGATION:
+            return input("➤ ").strip().lower()
+
+        old_settings = termios.tcgetattr(sys.stdin)
+        try:
+            tty.setraw(sys.stdin)
+
+            # Read first character
+            char = sys.stdin.read(1)
+
+            # Check for escape sequences (arrow keys)
+            if char == '\x1b':  # ESC
+                next_char = sys.stdin.read(1)
+                if next_char == '[':
+                    arrow = sys.stdin.read(1)
+                    if arrow == 'A':
+                        return 'up'
+                    elif arrow == 'B':
+                        return 'down'
+                return ''
+
+            # Backspace handling
+            if char == '\x7f':  # Backspace (DEL)
+                return 'back'
+            elif char == '\x08':  # Backspace (BS)
+                return 'back'
+
+            # Enter key
+            if char in ['\r', '\n']:
+                return 'enter'
+
+            # Digit keys
+            elif char.isdigit() and 1 <= int(char) <= 6:
+                return char
+            elif char == '\x03':  # Ctrl+C
+                raise KeyboardInterrupt
+
+            return ''
+        except Exception:
+            return input("➤ ").strip().lower()
+        finally:
+            # Restore terminal settings
+            try:
+                termios.tcsetattr(sys.stdin, termios.TCSADRAIN, old_settings)
+            except Exception:
+                pass
+
+    def _show_ascii_art(self) -> None:
+        """Display ASCII art banner."""
+        art = """
+  ██╗   ██╗██╗██████╗  █████╗ ████████╗██╗  ██╗
+  ██║   ██║██║██╔══██╗██╔══██╗╚══██╔══╝██║  ██║
+  ██║   ██║██║██████╔╝███████║   ██║   ███████║
+  ██║   ██║██║██╔═══╝ ██╔══██║   ██║   ██╔══██║
+  ╚██████╔╝██║██║     ██║  ██║   ██║   ██║  ██║
+   ╚═════╝ ╚═╝╚═╝     ╚═╝  ╚═╝   ╚═╝   ╚═╝  ╚═╝
+
+            Evaluation Builder
+        Interactive Evaluation Toolkit
+        """
+        console.info(art)
+
+    def _show_menu(self, current_selection: int, menu_items: list[str]) -> None:
+        """Show menu with current selection highlighted."""
+        console.info("\n⚙️  Main Menu:")
+        console.info("─" * 65)
+        for i, item in enumerate(menu_items):
+            if i == current_selection:
+                console.info(f"  ▶ {item}")
+            else:
+                console.info(f"    {item}")
+        console.info("\n💡 Use ↑/↓ arrows to navigate, Enter to select, or type 1-6")
+        console.info("Press Ctrl+C to exit")
diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py
index 70debd662..546d6dd63 100644
--- a/src/uipath/_cli/cli_eval.py
+++ b/src/uipath/_cli/cli_eval.py
@@ -139,7 +139,7 @@ def eval(
     # Handle interactive mode
     if interactive:
         try:
-            from ._eval_interactive import launch_interactive_cli
+            from ._interactive import launch_interactive_cli
             launch_interactive_cli()
             return
         except ImportError as e:

From d18dd75ebeef81d156c4c6f143378a1a76f15479 Mon Sep 17 00:00:00 2001
From: Chibi Vikramathithan <chibi.chakaravarthi@uipath.com>
Date: Tue, 30 Sep 2025 21:03:17 -0700
Subject: [PATCH 6/6] fix: apply ruff formatting to interactive module

- Formatted all files in _interactive/ module with ruff
- Ensures consistent code style across the codebase
---
 src/uipath/_cli/_interactive/_discovery.py  |  4 +-
 src/uipath/_cli/_interactive/_drill_down.py | 32 ++++----
 src/uipath/_cli/_interactive/_eval_sets.py  | 84 +++++++++++++--------
 src/uipath/_cli/_interactive/_evaluators.py | 64 ++++++++++------
 src/uipath/_cli/_interactive/_execution.py  | 42 ++++++++---
 src/uipath/_cli/_interactive/_main.py       | 36 +++++----
 src/uipath/_cli/_interactive/_navigation.py | 30 ++++----
 src/uipath/_cli/cli_eval.py                 |  5 +-
 8 files changed, 184 insertions(+), 113 deletions(-)

diff --git a/src/uipath/_cli/_interactive/_discovery.py b/src/uipath/_cli/_interactive/_discovery.py
index b0b4a68b1..08ea55d84 100644
--- a/src/uipath/_cli/_interactive/_discovery.py
+++ b/src/uipath/_cli/_interactive/_discovery.py
@@ -25,7 +25,9 @@ def _discover_files(self: "InteractiveEvalCLI") -> None:
                     with open(eval_file) as f:
                         data = json.load(f)
                     # Check if it's an eval set by presence of "evaluations" array
-                    if "evaluations" in data and isinstance(data.get("evaluations"), list):
+                    if "evaluations" in data and isinstance(
+                        data.get("evaluations"), list
+                    ):
                         name = data.get("name", eval_file.stem)
                         self.eval_sets.append((name, eval_file))
                 except Exception:
diff --git a/src/uipath/_cli/_interactive/_drill_down.py b/src/uipath/_cli/_interactive/_drill_down.py
index 6bdcf7453..a200054b0 100644
--- a/src/uipath/_cli/_interactive/_drill_down.py
+++ b/src/uipath/_cli/_interactive/_drill_down.py
@@ -24,25 +24,27 @@ def _drill_down_eval_sets(self: "InteractiveEvalCLI") -> None:
         while True:
             self._clear_screen()
             console.info("📋 Eval Sets - Navigate & Select")
-            console.info("⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back")
+            console.info(
+                "⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back"
+            )
             console.info("─" * 65)
 
             for i, (name, path) in enumerate(self.eval_sets):
                 if i == current_selection:
-                    console.info(f"► {i+1}. {name} ◄")
+                    console.info(f"► {i + 1}. {name} ◄")
                     self._show_eval_set_preview(path)
                 else:
-                    console.info(f"  {i+1}. {name}")
+                    console.info(f"  {i + 1}. {name}")
 
             key = self._get_key_input()
 
-            if key in ['q', 'Q', 'back']:
+            if key in ["q", "Q", "back"]:
                 break
-            elif key == 'up':
+            elif key == "up":
                 current_selection = (current_selection - 1) % len(self.eval_sets)
-            elif key == 'down':
+            elif key == "down":
                 current_selection = (current_selection + 1) % len(self.eval_sets)
-            elif key in ['enter', ' ']:
+            elif key in ["enter", " "]:
                 self._show_eval_set_details(self.eval_sets[current_selection])
             elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets):
                 current_selection = int(key) - 1
@@ -57,25 +59,27 @@ def _drill_down_evaluators(self: "InteractiveEvalCLI") -> None:
         while True:
             self._clear_screen()
             console.info("⚙️  Evaluators - Navigate & Select")
-            console.info("⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back")
+            console.info(
+                "⌨️  Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back"
+            )
             console.info("─" * 65)
 
             for i, (name, path) in enumerate(self.evaluators):
                 if i == current_selection:
-                    console.info(f"► {i+1}. {name} ◄")
+                    console.info(f"► {i + 1}. {name} ◄")
                     self._show_evaluator_preview(path)
                 else:
-                    console.info(f"  {i+1}. {name}")
+                    console.info(f"  {i + 1}. {name}")
 
             key = self._get_key_input()
 
-            if key in ['q', 'Q', 'back']:
+            if key in ["q", "Q", "back"]:
                 break
-            elif key == 'up':
+            elif key == "up":
                 current_selection = (current_selection - 1) % len(self.evaluators)
-            elif key == 'down':
+            elif key == "down":
                 current_selection = (current_selection + 1) % len(self.evaluators)
-            elif key in ['enter', ' ']:
+            elif key in ["enter", " "]:
                 self._show_evaluator_details(self.evaluators[current_selection])
             elif key.isdigit() and 1 <= int(key) <= len(self.evaluators):
                 current_selection = int(key) - 1
diff --git a/src/uipath/_cli/_interactive/_eval_sets.py b/src/uipath/_cli/_interactive/_eval_sets.py
index 9f6382149..2ac1da8df 100644
--- a/src/uipath/_cli/_interactive/_eval_sets.py
+++ b/src/uipath/_cli/_interactive/_eval_sets.py
@@ -39,15 +39,17 @@ def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None:
             "batchSize": 10,
             "timeoutMinutes": 20,
             "modelSettings": [],
-            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "evaluations": []
+            "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "evaluations": [],
         }
 
         # Ask if they want to add evaluations
         add_evals = self._get_input("Add evaluations now? (y/n): ").lower()
-        if add_evals in ['y', 'yes']:
-            eval_set["evaluations"] = self._add_evaluations_interactive(str(eval_set["id"]))
+        if add_evals in ["y", "yes"]:
+            eval_set["evaluations"] = self._add_evaluations_interactive(
+                str(eval_set["id"])
+            )
 
         # Ensure evaluationSets directory exists
         eval_sets_dir = self.project_root / "evaluationSets"
@@ -56,7 +58,7 @@ def _create_eval_set_simple(self: "InteractiveEvalCLI") -> None:
         # Save file
         file_path = eval_sets_dir / filename
 
-        with open(file_path, 'w') as f:
+        with open(file_path, "w") as f:
             json.dump(eval_set, f, indent=2)
 
         console.success(f"✅ Created eval set: {filename}")
@@ -87,12 +89,16 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
 
         evaluator_refs = []
         if self.evaluators:
-            refs_input = input("➤ Select evaluators (comma-separated numbers, or 'all'): ").strip()
-            if refs_input.lower() == 'all':
-                evaluator_refs = [self._get_evaluator_id(path) for eval_name, path in self.evaluators]
+            refs_input = input(
+                "➤ Select evaluators (comma-separated numbers, or 'all'): "
+            ).strip()
+            if refs_input.lower() == "all":
+                evaluator_refs = [
+                    self._get_evaluator_id(path) for eval_name, path in self.evaluators
+                ]
             elif refs_input:
                 try:
-                    for num in refs_input.split(','):
+                    for num in refs_input.split(","):
                         idx = int(num.strip()) - 1
                         if 0 <= idx < len(self.evaluators):
                             eval_path = self.evaluators[idx][1]
@@ -109,7 +115,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
         while True:
             console.info(f"\nTest Case #{test_count}")
             test_name = input("➤ Test Name (or 'done' to finish): ").strip()
-            if test_name.lower() == 'done':
+            if test_name.lower() == "done":
                 break
 
             if not test_name:
@@ -118,7 +124,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
 
             # Inputs
             console.info("📥 Inputs (JSON format)")
-            console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}")
+            console.info('Examples: {"a": 5, "b": 3} or {"query": "hello world"}')
             inputs_str = input("➤ Inputs: ").strip()
             try:
                 inputs = json.loads(inputs_str) if inputs_str else {}
@@ -147,8 +153,12 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
                 "simulateTools": False,
                 "toolsToSimulate": [],
                 "evalSetId": f"eval-{len(self.eval_sets) + 1}",
-                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+                "createdAt": datetime.now(timezone.utc)
+                .isoformat()
+                .replace("+00:00", "Z"),
+                "updatedAt": datetime.now(timezone.utc)
+                .isoformat()
+                .replace("+00:00", "Z"),
             }
             evaluations.append(evaluation)
             test_count += 1
@@ -167,9 +177,9 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
             "batchSize": 10,
             "timeoutMinutes": 20,
             "modelSettings": [],
-            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "evaluations": evaluations
+            "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "evaluations": evaluations,
         }
 
         # Ensure evaluationSets directory exists
@@ -180,7 +190,7 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
         file_path = eval_sets_dir / filename
 
         try:
-            with open(file_path, 'w') as f:
+            with open(file_path, "w") as f:
                 json.dump(eval_set, f, indent=2)
 
             console.success(f"\n✅ Created eval set: {filename}")
@@ -193,7 +203,9 @@ def _create_eval_set_interactive(self: "InteractiveEvalCLI") -> None:
 
         input("\nPress Enter to continue...")
 
-    def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -> List[Dict[str, Any]]:
+    def _add_evaluations_interactive(
+        self: "InteractiveEvalCLI", eval_set_id: str
+    ) -> List[Dict[str, Any]]:
         """Add evaluations interactively."""
         evaluations = []
         test_count = 1
@@ -201,7 +213,7 @@ def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -
         while True:
             console.info(f"\nTest Case #{test_count}")
             test_name = self._get_input("Test Name (or 'done' to finish): ")
-            if test_name.lower() == 'done':
+            if test_name.lower() == "done":
                 break
 
             if not test_name:
@@ -210,7 +222,7 @@ def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -
 
             # Inputs
             console.info("📥 Inputs (JSON format)")
-            console.info("Examples: {\"a\": 5, \"b\": 3} or {\"query\": \"hello world\"}")
+            console.info('Examples: {"a": 5, "b": 3} or {"query": "hello world"}')
             inputs_str = input("➤ Inputs: ").strip()
             try:
                 inputs = json.loads(inputs_str) if inputs_str else {}
@@ -239,8 +251,12 @@ def _add_evaluations_interactive(self: "InteractiveEvalCLI", eval_set_id: str) -
                 "simulateTools": False,
                 "toolsToSimulate": [],
                 "evalSetId": eval_set_id,
-                "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-                "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+                "createdAt": datetime.now(timezone.utc)
+                .isoformat()
+                .replace("+00:00", "Z"),
+                "updatedAt": datetime.now(timezone.utc)
+                .isoformat()
+                .replace("+00:00", "Z"),
             }
             evaluations.append(evaluation)
             test_count += 1
@@ -278,7 +294,9 @@ def _show_eval_set_preview(self: "InteractiveEvalCLI", path: Path) -> None:
         except Exception:
             console.info(f"    📄 {path.name} (error loading)")
 
-    def _show_eval_set_details(self: "InteractiveEvalCLI", eval_set_tuple: tuple[str, Path]) -> None:
+    def _show_eval_set_details(
+        self: "InteractiveEvalCLI", eval_set_tuple: tuple[str, Path]
+    ) -> None:
         """Show detailed eval set view."""
         name, path = eval_set_tuple
         self._clear_screen()
@@ -296,26 +314,26 @@ def _show_eval_set_details(self: "InteractiveEvalCLI", eval_set_tuple: tuple[str
             console.info(f"📦 Batch Size: {data.get('batchSize', 'Unknown')}")
             console.info(f"⏱️  Timeout: {data.get('timeoutMinutes', 'Unknown')} minutes")
 
-            evaluator_refs = data.get('evaluatorRefs', [])
+            evaluator_refs = data.get("evaluatorRefs", [])
             if evaluator_refs:
                 console.info("\n🎯 Evaluator References:")
                 for ref in evaluator_refs:
                     console.info(f"   • {ref}")
 
-            evaluations = data.get('evaluations', [])
+            evaluations = data.get("evaluations", [])
             if evaluations:
                 console.info("\n📝 Test Cases:")
                 for i, eval_data in enumerate(evaluations[:10], 1):  # Show first 10
-                    test_name = eval_data.get('name', f'Test {i}')
+                    test_name = eval_data.get("name", f"Test {i}")
                     console.info(f"   {i}. {test_name}")
-                    if 'inputs' in eval_data:
-                        inputs_preview = str(eval_data['inputs'])[:60]
-                        if len(str(eval_data['inputs'])) > 60:
+                    if "inputs" in eval_data:
+                        inputs_preview = str(eval_data["inputs"])[:60]
+                        if len(str(eval_data["inputs"])) > 60:
                             inputs_preview += "..."
                         console.info(f"      Input: {inputs_preview}")
-                    if 'expectedOutput' in eval_data:
-                        output_preview = str(eval_data['expectedOutput'])[:60]
-                        if len(str(eval_data['expectedOutput'])) > 60:
+                    if "expectedOutput" in eval_data:
+                        output_preview = str(eval_data["expectedOutput"])[:60]
+                        if len(str(eval_data["expectedOutput"])) > 60:
                             output_preview += "..."
                         console.info(f"      Expected: {output_preview}")
 
diff --git a/src/uipath/_cli/_interactive/_evaluators.py b/src/uipath/_cli/_interactive/_evaluators.py
index bb2968569..541a5bbf1 100644
--- a/src/uipath/_cli/_interactive/_evaluators.py
+++ b/src/uipath/_cli/_interactive/_evaluators.py
@@ -35,8 +35,8 @@ def _create_evaluator_simple(self: "InteractiveEvalCLI") -> None:
             "category": 0,
             "type": 1,
             "targetOutputKey": "*",
-            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+            "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
         }
 
         # Ensure evaluators directory exists
@@ -47,7 +47,7 @@ def _create_evaluator_simple(self: "InteractiveEvalCLI") -> None:
         filename = f"{name.lower().replace(' ', '_')}.json"
         file_path = evaluators_dir / filename
 
-        with open(file_path, 'w') as f:
+        with open(file_path, "w") as f:
             json.dump(evaluator, f, indent=2)
 
         console.success(f"✅ Created evaluator: {filename}")
@@ -75,7 +75,7 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
             0: "Deterministic",
             1: "LLM as Judge",
             2: "Agent Scorer",
-            3: "Trajectory"
+            3: "Trajectory",
         }
 
         for key, value in categories.items():
@@ -91,14 +91,25 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
         # Type Selection
         console.info(f"\n🎯 Type Selection (Category: {categories[category]})")
         types = {
-            0: "Unknown", 1: "Exact Match", 2: "Contains", 3: "Regex",
-            4: "Factuality", 5: "Custom", 6: "JSON Similarity", 7: "Trajectory"
+            0: "Unknown",
+            1: "Exact Match",
+            2: "Contains",
+            3: "Regex",
+            4: "Factuality",
+            5: "Custom",
+            6: "JSON Similarity",
+            7: "Trajectory",
         }
 
         # Show relevant types based on category
         relevant_types = []
         if category == 0:  # Deterministic
-            relevant_types = [1, 2, 3, 6]  # Exact Match, Contains, Regex, JSON Similarity
+            relevant_types = [
+                1,
+                2,
+                3,
+                6,
+            ]  # Exact Match, Contains, Regex, JSON Similarity
         elif category == 1:  # LLM as Judge
             relevant_types = [4, 5]  # Factuality, Custom
         elif category == 3:  # Trajectory
@@ -110,7 +121,10 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
             console.info(f"  {type_id}. {types[type_id]}")
 
         try:
-            eval_type = int(input(f"➤ Select Type ({', '.join(map(str, relevant_types))}): ") or str(relevant_types[0]))
+            eval_type = int(
+                input(f"➤ Select Type ({', '.join(map(str, relevant_types))}): ")
+                or str(relevant_types[0])
+            )
             if eval_type not in relevant_types:
                 eval_type = relevant_types[0]
         except (ValueError, IndexError):
@@ -118,7 +132,9 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
 
         # Target Output Key
         console.info("\n🔍 Target Configuration")
-        console.info("Target Output Key determines which part of the output to evaluate")
+        console.info(
+            "Target Output Key determines which part of the output to evaluate"
+        )
         console.info("Examples: '*' (all), 'result', 'answer', 'output'")
         target_key = input("➤ Target Output Key (default: '*'): ").strip() or "*"
 
@@ -130,8 +146,8 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
             "category": category,
             "type": eval_type,
             "targetOutputKey": target_key,
-            "createdAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z'),
-            "updatedAt": datetime.now(timezone.utc).isoformat().replace('+00:00', 'Z')
+            "createdAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
+            "updatedAt": datetime.now(timezone.utc).isoformat().replace("+00:00", "Z"),
         }
 
         # LLM Configuration (if LLM as Judge)
@@ -148,7 +164,7 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
                     "modelName": model_name,
                     "prompt": prompt,
                     "temperature": 0.0,
-                    "maxTokens": 1000
+                    "maxTokens": 1000,
                 }
 
         # Ensure evaluators directory exists
@@ -160,7 +176,7 @@ def _create_evaluator_interactive(self: "InteractiveEvalCLI") -> None:
         file_path = evaluators_dir / filename
 
         try:
-            with open(file_path, 'w') as f:
+            with open(file_path, "w") as f:
                 json.dump(evaluator, f, indent=2)
 
             console.success(f"\n✅ Created evaluator: {filename}")
@@ -205,7 +221,9 @@ def _show_evaluator_preview(self: "InteractiveEvalCLI", path: Path) -> None:
         except Exception:
             console.info(f"    📄 {path.name} (error loading)")
 
-    def _show_evaluator_details(self: "InteractiveEvalCLI", evaluator_tuple: tuple[str, Path]) -> None:
+    def _show_evaluator_details(
+        self: "InteractiveEvalCLI", evaluator_tuple: tuple[str, Path]
+    ) -> None:
         """Show detailed evaluator view."""
         name, path = evaluator_tuple
         self._clear_screen()
@@ -219,17 +237,19 @@ def _show_evaluator_details(self: "InteractiveEvalCLI", evaluator_tuple: tuple[s
             console.info(f"\n📄 {path.name}")
             console.info(f"🆔 ID: {data.get('id', 'Unknown')}")
             console.info(f"📝 Description: {data.get('description', 'No description')}")
-            console.info(f"🏷️  Category: {self._get_category_name(data.get('category', 0))}")
+            console.info(
+                f"🏷️  Category: {self._get_category_name(data.get('category', 0))}"
+            )
             console.info(f"🎯 Type: {self._get_type_name(data.get('type', 1))}")
             console.info(f"🔍 Target Key: {data.get('targetOutputKey', '*')}")
 
-            if 'llmConfig' in data:
-                llm_config = data['llmConfig']
+            if "llmConfig" in data:
+                llm_config = data["llmConfig"]
                 console.info("\n🤖 LLM Configuration:")
                 console.info(f"   Model: {llm_config.get('modelName', 'Unknown')}")
-                if 'prompt' in llm_config:
-                    prompt_preview = llm_config['prompt'][:100]
-                    if len(llm_config['prompt']) > 100:
+                if "prompt" in llm_config:
+                    prompt_preview = llm_config["prompt"][:100]
+                    if len(llm_config["prompt"]) > 100:
                         prompt_preview += "..."
                     console.info(f"   Prompt: {prompt_preview}")
 
@@ -245,7 +265,7 @@ def _get_category_name(self: "InteractiveEvalCLI", category: int) -> str:
             0: "Deterministic",
             1: "LLM as Judge",
             2: "Agent Scorer",
-            3: "Trajectory"
+            3: "Trajectory",
         }
         return categories.get(category, "Unknown")
 
@@ -259,7 +279,7 @@ def _get_type_name(self: "InteractiveEvalCLI", eval_type: int) -> str:
             4: "Factuality",
             5: "Custom",
             6: "JSON Similarity",
-            7: "Trajectory"
+            7: "Trajectory",
         }
         return types.get(eval_type, "Unknown")
 
diff --git a/src/uipath/_cli/_interactive/_execution.py b/src/uipath/_cli/_interactive/_execution.py
index f2283bc2b..8152fb3a5 100644
--- a/src/uipath/_cli/_interactive/_execution.py
+++ b/src/uipath/_cli/_interactive/_execution.py
@@ -29,13 +29,19 @@ def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None:
 
         # Build command - run from the project directory
         cmd = [
-            sys.executable, "-m", "uipath._cli.cli_eval",
+            sys.executable,
+            "-m",
+            "uipath._cli.cli_eval",
             str(main_py.relative_to(self.project_root)),
             str(eval_path.relative_to(self.project_root)),
-            "--no-report", "--workers", "1"
+            "--no-report",
+            "--workers",
+            "1",
         ]
 
-        console.info(f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report")
+        console.info(
+            f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report"
+        )
 
         try:
             # Run with real-time output from project directory
@@ -46,7 +52,7 @@ def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None:
                 text=True,
                 bufsize=1,
                 universal_newlines=True,
-                cwd=self.project_root
+                cwd=self.project_root,
             )
 
             # Stream output in real-time
@@ -59,12 +65,16 @@ def _execute_evaluation(self: "InteractiveEvalCLI", eval_path: Path) -> None:
             if process.returncode == 0:
                 console.success("\n✅ Evaluation completed successfully!")
             else:
-                console.error(f"\n❌ Evaluation failed (exit code: {process.returncode})")
+                console.error(
+                    f"\n❌ Evaluation failed (exit code: {process.returncode})"
+                )
 
         except Exception as e:
             console.error(f"Failed to run evaluation: {e}")
 
-    def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) -> None:
+    def _execute_evaluation_no_clear(
+        self: "InteractiveEvalCLI", eval_path: Path
+    ) -> None:
         """Execute evaluation without clearing screen."""
         console.info("\n🚀 Running evaluation...")
 
@@ -77,13 +87,19 @@ def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) ->
 
         # Build command - run from the project directory
         cmd = [
-            sys.executable, "-m", "uipath._cli.cli_eval",
+            sys.executable,
+            "-m",
+            "uipath._cli.cli_eval",
             str(main_py.relative_to(self.project_root)),
             str(eval_path.relative_to(self.project_root)),
-            "--no-report", "--workers", "1"
+            "--no-report",
+            "--workers",
+            "1",
         ]
 
-        console.info(f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report")
+        console.info(
+            f"💻 Command: uipath eval {main_py.name} {eval_path.name} --no-report"
+        )
 
         try:
             # Run with real-time output from project directory
@@ -94,7 +110,7 @@ def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) ->
                 text=True,
                 bufsize=1,
                 universal_newlines=True,
-                cwd=self.project_root
+                cwd=self.project_root,
             )
 
             # Stream output in real-time
@@ -107,7 +123,9 @@ def _execute_evaluation_no_clear(self: "InteractiveEvalCLI", eval_path: Path) ->
             if process.returncode == 0:
                 console.success("\n✅ Evaluation completed successfully!")
             else:
-                console.error(f"\n❌ Evaluation failed (exit code: {process.returncode})")
+                console.error(
+                    f"\n❌ Evaluation failed (exit code: {process.returncode})"
+                )
 
         except Exception as e:
             console.error(f"Failed to run evaluation: {e}")
@@ -132,4 +150,4 @@ def _find_main_py(self: "InteractiveEvalCLI") -> Optional[Path]:
     def _confirm(self: "InteractiveEvalCLI", prompt: str) -> bool:
         """Ask for confirmation."""
         response = self._get_input(f"{prompt} (y/n): ").lower()
-        return response in ['y', 'yes']
+        return response in ["y", "yes"]
diff --git a/src/uipath/_cli/_interactive/_main.py b/src/uipath/_cli/_interactive/_main.py
index c15afc2e0..c41f4023a 100644
--- a/src/uipath/_cli/_interactive/_main.py
+++ b/src/uipath/_cli/_interactive/_main.py
@@ -20,7 +20,7 @@ class InteractiveEvalCLI(
     EvalSetMixin,
     EvaluatorMixin,
     ExecutionMixin,
-    DrillDownMixin
+    DrillDownMixin,
 ):
     """Simple, fast, keyboard-driven evaluation CLI."""
 
@@ -35,7 +35,7 @@ def __init__(self, project_root: Optional[Path] = None):
             "⚡ Quick run (auto-select)",
             "➕ Create eval set",
             "➕ Create evaluator",
-            "🎯 Run specific combination"
+            "🎯 Run specific combination",
         ]
         self._discover_files()
 
@@ -44,7 +44,9 @@ def run(self) -> None:
         self._show_ascii_art()
 
         if not HAS_NAVIGATION:
-            console.warning("⚠️  Terminal navigation not available. Using fallback mode.")
+            console.warning(
+                "⚠️  Terminal navigation not available. Using fallback mode."
+            )
             console.info("Consider using a standard terminal for better experience.\n")
             self._run_fallback_mode()
             return
@@ -63,11 +65,15 @@ def _run_navigation_mode(self) -> None:
 
             key = self._get_key_input()
 
-            if key == 'up':
-                self.current_selection = (self.current_selection - 1) % len(self.menu_items)
-            elif key == 'down':
-                self.current_selection = (self.current_selection + 1) % len(self.menu_items)
-            elif key in ['enter', ' ']:
+            if key == "up":
+                self.current_selection = (self.current_selection - 1) % len(
+                    self.menu_items
+                )
+            elif key == "down":
+                self.current_selection = (self.current_selection + 1) % len(
+                    self.menu_items
+                )
+            elif key in ["enter", " "]:
                 self._execute_menu_item_with_navigation(self.current_selection)
             elif key.isdigit() and 1 <= int(key) <= 6:
                 self._execute_menu_item_with_navigation(int(key) - 1)
@@ -98,20 +104,20 @@ def _run_fallback_mode(self) -> None:
             try:
                 choice = input("\n➤ Select option: ").strip()
 
-                if choice == '0':
+                if choice == "0":
                     console.info("👋 Goodbye!")
                     break
-                elif choice == '1':
+                elif choice == "1":
                     self._list_eval_sets_navigation()
-                elif choice == '2':
+                elif choice == "2":
                     self._list_evaluators()
-                elif choice == '3':
+                elif choice == "3":
                     self._quick_run()
-                elif choice == '4':
+                elif choice == "4":
                     self._create_eval_set_simple()
-                elif choice == '5':
+                elif choice == "5":
                     self._create_evaluator_simple()
-                elif choice == '6':
+                elif choice == "6":
                     self._run_specific_combination()
                 else:
                     console.warning("Invalid option")
diff --git a/src/uipath/_cli/_interactive/_navigation.py b/src/uipath/_cli/_interactive/_navigation.py
index 66514716b..4f8077ca0 100644
--- a/src/uipath/_cli/_interactive/_navigation.py
+++ b/src/uipath/_cli/_interactive/_navigation.py
@@ -45,33 +45,33 @@ def _get_key_input(self) -> str:
             char = sys.stdin.read(1)
 
             # Check for escape sequences (arrow keys)
-            if char == '\x1b':  # ESC
+            if char == "\x1b":  # ESC
                 next_char = sys.stdin.read(1)
-                if next_char == '[':
+                if next_char == "[":
                     arrow = sys.stdin.read(1)
-                    if arrow == 'A':
-                        return 'up'
-                    elif arrow == 'B':
-                        return 'down'
-                return ''
+                    if arrow == "A":
+                        return "up"
+                    elif arrow == "B":
+                        return "down"
+                return ""
 
             # Backspace handling
-            if char == '\x7f':  # Backspace (DEL)
-                return 'back'
-            elif char == '\x08':  # Backspace (BS)
-                return 'back'
+            if char == "\x7f":  # Backspace (DEL)
+                return "back"
+            elif char == "\x08":  # Backspace (BS)
+                return "back"
 
             # Enter key
-            if char in ['\r', '\n']:
-                return 'enter'
+            if char in ["\r", "\n"]:
+                return "enter"
 
             # Digit keys
             elif char.isdigit() and 1 <= int(char) <= 6:
                 return char
-            elif char == '\x03':  # Ctrl+C
+            elif char == "\x03":  # Ctrl+C
                 raise KeyboardInterrupt
 
-            return ''
+            return ""
         except Exception:
             return input("➤ ").strip().lower()
         finally:
diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py
index 546d6dd63..bfdcdbcde 100644
--- a/src/uipath/_cli/cli_eval.py
+++ b/src/uipath/_cli/cli_eval.py
@@ -68,7 +68,9 @@ def _display_local_results(results_data):
             result = evaluator_result.get("result", {})
             score = result.get("score", 0.0)
             eval_time = result.get("evaluationTime", 0.0)
-            console.info(f"   └─ {evaluator_name}: {score:.1f}% ({eval_time*1000:.2f}ms)")
+            console.info(
+                f"   └─ {evaluator_name}: {score:.1f}% ({eval_time * 1000:.2f}ms)"
+            )
 
     console.info(f"\n🎯 Summary: {int(passed_count)}/{total_count} tests passed")
     if overall_score == 100.0:
@@ -140,6 +142,7 @@ def eval(
     if interactive:
         try:
             from ._interactive import launch_interactive_cli
+
             launch_interactive_cli()
             return
         except ImportError as e: