Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,10 @@ no_implicit_reexport = true

disallow_untyped_defs = false

[[tool.mypy.overrides]]
module = "uipath._cli._interactive.*"
disable_error_code = ["misc", "unused-ignore"]

[tool.pydantic-mypy]
init_forbid_extra = true
init_typed = true
Expand Down
118 changes: 118 additions & 0 deletions samples/calculator/evaluationSets/comprehensive_calculator_tests.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
{
"id": "calc-comprehensive-001",
"fileName": "comprehensive_eval_set.json",
"evaluatorRefs": ["exact-match-eval", "json-similarity-eval"],
"name": "Comprehensive Calculator Tests",
"batchSize": 10,
"timeoutMinutes": 10,
"modelSettings": [],
"createdAt": "2025-01-25T00:00:00Z",
"updatedAt": "2025-01-25T00:00:00Z",
"evaluations": [
{
"id": "add-basic",
"name": "Basic Addition",
"inputs": {
"a": 5,
"b": 3,
"operator": "+"
},
"expectedOutput": {
"result": 8.0
},
"expectedAgentBehavior": "Add two positive numbers",
"simulationInstructions": "",
"simulateInput": false,
"inputGenerationInstructions": "",
"simulateTools": false,
"toolsToSimulate": [],
"evalSetId": "calc-comprehensive-001",
"createdAt": "2025-01-25T00:00:00Z",
"updatedAt": "2025-01-25T00:00:00Z"
},
{
"id": "sub-basic",
"name": "Basic Subtraction",
"inputs": {
"a": 10,
"b": 4,
"operator": "-"
},
"expectedOutput": {
"result": 6.0
},
"expectedAgentBehavior": "Subtract smaller from larger",
"simulationInstructions": "",
"simulateInput": false,
"inputGenerationInstructions": "",
"simulateTools": false,
"toolsToSimulate": [],
"evalSetId": "calc-comprehensive-001",
"createdAt": "2025-01-25T00:00:00Z",
"updatedAt": "2025-01-25T00:00:00Z"
},
{
"id": "mul-basic",
"name": "Basic Multiplication",
"inputs": {
"a": 7,
"b": 6,
"operator": "*"
},
"expectedOutput": {
"result": 42.0
},
"expectedAgentBehavior": "Multiply two integers",
"simulationInstructions": "",
"simulateInput": false,
"inputGenerationInstructions": "",
"simulateTools": false,
"toolsToSimulate": [],
"evalSetId": "calc-comprehensive-001",
"createdAt": "2025-01-25T00:00:00Z",
"updatedAt": "2025-01-25T00:00:00Z"
},
{
"id": "div-basic",
"name": "Basic Division",
"inputs": {
"a": 15,
"b": 3,
"operator": "/"
},
"expectedOutput": {
"result": 5.0
},
"expectedAgentBehavior": "Divide evenly",
"simulationInstructions": "",
"simulateInput": false,
"inputGenerationInstructions": "",
"simulateTools": false,
"toolsToSimulate": [],
"evalSetId": "calc-comprehensive-001",
"createdAt": "2025-01-25T00:00:00Z",
"updatedAt": "2025-01-25T00:00:00Z"
},
{
"id": "div-zero",
"name": "Division by Zero",
"inputs": {
"a": 10,
"b": 0,
"operator": "/"
},
"expectedOutput": {
"result": 0.0
},
"expectedAgentBehavior": "Handle division by zero",
"simulationInstructions": "",
"simulateInput": false,
"inputGenerationInstructions": "",
"simulateTools": false,
"toolsToSimulate": [],
"evalSetId": "calc-comprehensive-001",
"createdAt": "2025-01-25T00:00:00Z",
"updatedAt": "2025-01-25T00:00:00Z"
}
]
}
10 changes: 10 additions & 0 deletions samples/calculator/evaluators/exact_match.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"id": "exact-match-eval",
"name": "Exact Match Evaluator",
"description": "Tests for exact output matches",
"category": 0,
"type": 1,
"targetOutputKey": "*",
"createdAt": "2025-01-25T00:00:00Z",
"updatedAt": "2025-01-25T00:00:00Z"
}
10 changes: 10 additions & 0 deletions samples/calculator/evaluators/json_similarity.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
{
"id": "json-similarity-eval",
"name": "JSON Similarity Evaluator",
"description": "Tests for structural JSON similarity with tolerance",
"category": 0,
"type": 6,
"targetOutputKey": "*",
"createdAt": "2025-01-25T00:00:00Z",
"updatedAt": "2025-01-25T00:00:00Z"
}
5 changes: 5 additions & 0 deletions src/uipath/_cli/_interactive/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
"""Interactive evaluation CLI module."""

from ._main import launch_interactive_cli

__all__ = ["launch_interactive_cli"]
48 changes: 48 additions & 0 deletions src/uipath/_cli/_interactive/_discovery.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""Discovery utilities for finding eval sets and evaluators."""
# type: ignore

import json
from typing import TYPE_CHECKING

if TYPE_CHECKING:
from ._main import InteractiveEvalCLI


class DiscoveryMixin:
"""Mixin for file discovery operations."""

def _discover_files(self: "InteractiveEvalCLI") -> None:
"""Quickly discover eval sets and evaluators."""
# Clear existing lists to avoid duplicates
self.eval_sets.clear()
self.evaluators.clear()

# Find eval sets from evaluationSets folder
eval_sets_dir = self.project_root / "evaluationSets"
if eval_sets_dir.exists():
for eval_file in eval_sets_dir.glob("*.json"):
try:
with open(eval_file) as f:
data = json.load(f)
# Check if it's an eval set by presence of "evaluations" array
if "evaluations" in data and isinstance(
data.get("evaluations"), list
):
name = data.get("name", eval_file.stem)
self.eval_sets.append((name, eval_file))
except Exception:
pass

# Find evaluators from evaluators folder
evaluators_dir = self.project_root / "evaluators"
if evaluators_dir.exists():
for eval_file in evaluators_dir.glob("*.json"):
try:
with open(eval_file) as f:
data = json.load(f)
# Verify it has evaluator-specific fields
if "id" in data and "type" in data:
name = data.get("name", eval_file.stem)
self.evaluators.append((name, eval_file))
except Exception:
pass
92 changes: 92 additions & 0 deletions src/uipath/_cli/_interactive/_drill_down.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
"""Drill-down navigation for eval sets and evaluators."""
# type: ignore

from typing import TYPE_CHECKING

from .._utils._console import ConsoleLogger

if TYPE_CHECKING:
from ._main import InteractiveEvalCLI

console = ConsoleLogger()


class DrillDownMixin:
"""Mixin for drill-down navigation operations."""

def _drill_down_eval_sets(self: "InteractiveEvalCLI") -> None:
"""Drill down into eval sets with navigation."""
if not self.eval_sets:
self._show_no_items_screen("eval sets")
return

current_selection = 0
while True:
self._clear_screen()
console.info("📋 Eval Sets - Navigate & Select")
console.info(
"⌨️ Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back"
)
console.info("─" * 65)

for i, (name, path) in enumerate(self.eval_sets):
if i == current_selection:
console.info(f"► {i + 1}. {name} ◄")
self._show_eval_set_preview(path)
else:
console.info(f" {i + 1}. {name}")

key = self._get_key_input()

if key in ["q", "Q", "back"]:
break
elif key == "up":
current_selection = (current_selection - 1) % len(self.eval_sets)
elif key == "down":
current_selection = (current_selection + 1) % len(self.eval_sets)
elif key in ["enter", " "]:
self._show_eval_set_details(self.eval_sets[current_selection])
elif key.isdigit() and 1 <= int(key) <= len(self.eval_sets):
current_selection = int(key) - 1

def _drill_down_evaluators(self: "InteractiveEvalCLI") -> None:
"""Drill down into evaluators with navigation."""
if not self.evaluators:
self._show_no_items_screen("evaluators")
return

current_selection = 0
while True:
self._clear_screen()
console.info("⚙️ Evaluators - Navigate & Select")
console.info(
"⌨️ Navigation: ↑↓ to navigate, Enter for details, q/Backspace to go back"
)
console.info("─" * 65)

for i, (name, path) in enumerate(self.evaluators):
if i == current_selection:
console.info(f"► {i + 1}. {name} ◄")
self._show_evaluator_preview(path)
else:
console.info(f" {i + 1}. {name}")

key = self._get_key_input()

if key in ["q", "Q", "back"]:
break
elif key == "up":
current_selection = (current_selection - 1) % len(self.evaluators)
elif key == "down":
current_selection = (current_selection + 1) % len(self.evaluators)
elif key in ["enter", " "]:
self._show_evaluator_details(self.evaluators[current_selection])
elif key.isdigit() and 1 <= int(key) <= len(self.evaluators):
current_selection = int(key) - 1

def _show_no_items_screen(self: "InteractiveEvalCLI", item_type: str) -> None:
"""Show no items screen."""
self._clear_screen()
console.warning(f"No {item_type} found!")
console.info("Press Enter to go back...")
self._get_input("")
Loading