Merge branch 'main' into release/revamped-evals

akshaylive · web-flow · commit 35fc809810b3 · 2025-10-23T11:55:05.000-07:00
diff --git a/src/uipath/_cli/__init__.py b/src/uipath/_cli/__init__.py
@@ -9,7 +9,7 @@
 from .cli_debug import debug as debug  # type: ignore
 from .cli_deploy import deploy as deploy  # type: ignore
 from .cli_dev import dev as dev
-from .cli_eval import eval as eval  # type: ignore
+from .cli_eval import eval as eval
 from .cli_init import init as init  # type: ignore
 from .cli_invoke import invoke as invoke  # type: ignore
 from .cli_new import new as new  # type: ignore
diff --git a/src/uipath/_cli/_debug/_bridge.py b/src/uipath/_cli/_debug/_bridge.py
@@ -434,7 +434,8 @@ def get_remote_debug_bridge(context: UiPathRuntimeContext) -> UiPathDebugBridge:
     if not context.trace_context:
         raise ValueError("trace_context is required for remote debugging")
 
-    signalr_url = uipath_url + "/agenthub_/wsstunnel?jobId=" + context.job_id
+    signalr_url = f"{uipath_url.rstrip('/')}/orchestrator_/signalr/robotdebug?sessionId={context.job_id}"
+
     return SignalRDebugBridge(
         hub_url=signalr_url,
         access_token=os.environ.get("UIPATH_ACCESS_TOKEN"),
diff --git a/src/uipath/_cli/_evals/__init__.py b/src/uipath/_cli/_evals/__init__.py
diff --git a/src/uipath/_cli/_evals/_evaluate.py b/src/uipath/_cli/_evals/_evaluate.py
@@ -0,0 +1,28 @@
+from typing import TypeVar
+
+from uipath._cli._evals._runtime import UiPathEvalContext, UiPathEvalRuntime
+from uipath._cli._runtime._contracts import (
+    UiPathBaseRuntime,
+    UiPathRuntimeContext,
+    UiPathRuntimeFactory,
+    UiPathRuntimeResult,
+)
+from uipath._events._event_bus import EventBus
+
+T = TypeVar("T", bound=UiPathBaseRuntime)
+C = TypeVar("C", bound=UiPathRuntimeContext)
+
+
+async def evaluate(
+    runtime_factory: UiPathRuntimeFactory[T, C],
+    eval_context: UiPathEvalContext,
+    event_bus: EventBus,
+) -> UiPathRuntimeResult:
+    async with UiPathEvalRuntime.from_eval_context(
+        factory=runtime_factory,
+        context=eval_context,
+        event_bus=event_bus,
+    ) as eval_runtime:
+        results = await eval_runtime.execute()
+        await event_bus.wait_for_all(timeout=10)
+        return results
diff --git a/src/uipath/_cli/_evals/_runtime.py b/src/uipath/_cli/_evals/_runtime.py
@@ -183,7 +183,7 @@ def from_eval_context(
     ) -> "UiPathEvalRuntime[T, C]":
         return cls(context, factory, event_bus)
 
-    async def execute(self) -> Optional[UiPathRuntimeResult]:
+    async def execute(self) -> UiPathRuntimeResult:
         if self.context.eval_set is None:
             raise ValueError("eval_set must be provided for evaluation runs")
 
diff --git a/src/uipath/_cli/cli_eval.py b/src/uipath/_cli/cli_eval.py
@@ -1,4 +1,3 @@
-# type: ignore
 import ast
 import asyncio
 import os
@@ -7,10 +6,10 @@
 import click
 
 from uipath._cli._evals._console_progress_reporter import ConsoleProgressReporter
+from uipath._cli._evals._evaluate import evaluate
 from uipath._cli._evals._progress_reporter import StudioWebProgressReporter
 from uipath._cli._evals._runtime import (
     UiPathEvalContext,
-    UiPathEvalRuntime,
 )
 from uipath._cli._runtime._runtime_factory import generate_runtime_factory
 from uipath._cli._utils._constants import UIPATH_PROJECT_ID
@@ -46,9 +45,9 @@ def setup_reporting_prereq(no_report: bool) -> bool:
         )
         return False
     if not os.getenv("UIPATH_FOLDER_KEY"):
-        os.environ["UIPATH_FOLDER_KEY"] = asyncio.run(
-            get_personal_workspace_key_async()
-        )
+        folder_key = asyncio.run(get_personal_workspace_key_async())
+        if folder_key:
+            os.environ["UIPATH_FOLDER_KEY"] = folder_key
     return True
 
 
@@ -145,17 +144,8 @@ def eval(
             runtime_factory = generate_runtime_factory()
             if eval_context.job_id:
                 runtime_factory.add_span_exporter(LlmOpsHttpExporter())
+            asyncio.run(evaluate(runtime_factory, eval_context, event_bus))
 
-            async def execute():
-                async with UiPathEvalRuntime.from_eval_context(
-                    factory=runtime_factory,
-                    context=eval_context,
-                    event_bus=event_bus,
-                ) as eval_runtime:
-                    await eval_runtime.execute()
-                    await event_bus.wait_for_all(timeout=10)
-
-            asyncio.run(execute())
         except Exception as e:
             console.error(
                 f"Error occurred: {e or 'Execution failed'}", include_traceback=True
diff --git a/tests/cli/eval/evals/eval-sets/default.json b/tests/cli/eval/evals/eval-sets/default.json
@@ -0,0 +1,24 @@
+{
+  "fileName": "default.json",
+  "id": "default-eval-set-id",
+  "name": "Basic Calculator Evaluation Set",
+  "batchSize": 10,
+  "evaluatorRefs": [
+    "equality"
+  ],
+  "evaluations": [
+    {
+      "id": "test-addition",
+      "name": "Test Addition",
+      "inputs": {"foo":  "bar"},
+      "expectedOutput": {"foo":  "bar"},
+      "expectedAgentBehavior": "",
+      "evalSetId": "default-eval-set-id",
+      "createdAt": "2025-09-04T18:54:58.378Z",
+      "updatedAt": "2025-09-04T18:55:55.416Z"
+    }
+  ],
+  "modelSettings": [],
+  "createdAt": "2025-09-04T18:54:58.379Z",
+  "updatedAt": "2025-09-04T18:55:55.416Z"
+}
diff --git a/tests/cli/eval/evals/evaluators/equality.json b/tests/cli/eval/evals/evaluators/equality.json
@@ -0,0 +1,11 @@
+{
+    "fileName": "equality.json",
+    "id": "equality",
+    "name": "Equality Evaluator",
+    "description": "An evaluator that judges the agent based on expected output.",
+    "category": 0,
+    "type": 1,
+    "targetOutputKey": "*",
+    "createdAt": "2025-06-26T17:45:39.651Z",
+    "updatedAt": "2025-06-26T17:45:39.651Z"
+}
diff --git a/tests/cli/eval/test_evaluate.py b/tests/cli/eval/test_evaluate.py
@@ -0,0 +1,41 @@
+from pathlib import Path
+from typing import Any
+
+from uipath._cli._evals._evaluate import evaluate
+from uipath._cli._evals._runtime import UiPathEvalContext
+from uipath._cli._runtime._contracts import UiPathRuntimeContext, UiPathRuntimeFactory
+from uipath._cli._runtime._runtime import UiPathRuntime
+from uipath._events._event_bus import EventBus
+
+
+async def test_evaluate():
+    # Arrange
+    event_bus = EventBus()
+    context = UiPathEvalContext(
+        eval_set=str(Path(__file__).parent / "evals" / "eval-sets" / "default.json")
+    )
+
+    async def identity(input: Any) -> Any:
+        return input
+
+    class MyFactory(UiPathRuntimeFactory[UiPathRuntime, UiPathRuntimeContext]):
+        def __init__(self):
+            super().__init__(
+                UiPathRuntime,
+                UiPathRuntimeContext,
+                runtime_generator=lambda context: UiPathRuntime(
+                    context, executor=identity
+                ),
+            )
+
+    # Act
+    result = await evaluate(MyFactory(), context, event_bus)
+
+    # Assert
+    assert result.output
+    assert (
+        result.output["evaluationSetResults"][0]["evaluationRunResults"][0]["result"][
+            "score"
+        ]
+        == 100.0
+    )
diff --git a/uv.lock b/uv.lock