chore: speed up health check

ducphamle2 · ducphamle2 · commit 811b8b438388 · 2025-06-09T10:31:48.000-07:00
diff --git a/openhands/agenthub/codeact_agent/codeact_agent.py b/openhands/agenthub/codeact_agent/codeact_agent.py
@@ -1,3 +1,4 @@
+import asyncio
 import json
 import os
 from collections import deque
@@ -571,7 +572,10 @@ async def _get_available_llms_from_health_check(
         if perform_health_check_fn is None:
             perform_health_check_fn = perform_health_check
         models_rate_limit: dict[str, tuple[int, int, float]] = {}
-        for name, llm in self.routing_llms.items():
+
+        async def check_llm(
+            name: str, llm: LLM
+        ) -> tuple[str, tuple[int, int, float]] | None:
             (remaining_requests, remaining_tokens) = await perform_health_check_fn(
                 {
                     'model': llm.config.model,
@@ -580,11 +584,20 @@ async def _get_available_llms_from_health_check(
                 }
             )
             if remaining_requests is not None and remaining_tokens is not None:
-                models_rate_limit[name] = (
-                    remaining_requests,
-                    remaining_tokens,
-                    llm.config.weight,
-                )
+                return name, (remaining_requests, remaining_tokens, llm.config.weight)
+            return None
+
+        tasks = [check_llm(name, llm) for name, llm in self.routing_llms.items()]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        models_rate_limit = {
+            name: data
+            for result in results
+            if result is not None
+            and not isinstance(result, Exception)
+            and not isinstance(result, BaseException)
+            for name, data in [result]
+        }
         return models_rate_limit
 
     def _select_llm_from_weights(
diff --git a/tests/unit/test_agent_delegation.py b/tests/unit/test_agent_delegation.py
@@ -225,6 +225,9 @@ async def mock_react_to_exception(*args, **kwargs):
         dummy_message._source = EventSource.USER
         await parent_controller._on_event(dummy_message)
 
+        # Give time for the async cleanup to complete
+        await asyncio.sleep(0.1)
+
         # Verify parent is cleaned up
         assert (
             parent_controller.delegate is None