oraichain
diff --git a/‎.env.example‎
Lines changed: 1 addition & 1 deletion b/‎.env.example‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎config.template.toml‎
Lines changed: 3 additions & 0 deletions b/‎config.template.toml‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎openhands/agenthub/codeact_agent/codeact_agent.py‎
Lines changed: 199 additions & 2 deletions b/‎openhands/agenthub/codeact_agent/codeact_agent.py‎
Lines changed: 199 additions & 2 deletions
diff --git a/‎openhands/controller/agent.py‎
Lines changed: 6 additions & 0 deletions b/‎openhands/controller/agent.py‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎openhands/core/config/app_config.py‎
Lines changed: 10 additions & 2 deletions b/‎openhands/core/config/app_config.py‎
Lines changed: 10 additions & 2 deletions
diff --git a/‎openhands/core/config/llm_config.py‎
Lines changed: 1 addition & 0 deletions b/‎openhands/core/config/llm_config.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎openhands/core/config/utils.py‎
Lines changed: 1 addition & 1 deletion b/‎openhands/core/config/utils.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎openhands/core/main.py‎
Lines changed: 1 addition & 0 deletions b/‎openhands/core/main.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎openhands/llm/health_check.py‎
Lines changed: 56 additions & 0 deletions b/‎openhands/llm/health_check.py‎
Lines changed: 56 additions & 0 deletions
diff --git a/‎openhands/llm/llm.py‎
Lines changed: 1 addition & 1 deletion b/‎openhands/llm/llm.py‎
Lines changed: 1 addition & 1 deletion
@@ -15,7 +15,7 @@ JWT_SECRET=your-secret-key
 # Thesis Auth Server URL
 THESIS_AUTH_SERVER_URL=
 
-# Run Mode (PROD) 
+# Run Mode (PROD)
 RUN_MODE='PROD'
 
 
 
@@ -211,10 +211,13 @@ model = "gpt-4o"
 # https://github.com/All-Hands-AI/OpenHands/pull/4711
 #native_tool_calling = None
 
+# weight of the LLM. This is used to select the LLM when multiple LLMs are available.
+#weight = 0.6
 
 [llm.gpt4o-mini]
 api_key = ""
 model = "gpt-4o"
+weight = 0.4
 
 
 #################################### Agent ###################################
 
@@ -1,10 +1,12 @@
+import asyncio
 import json
 import os
 from collections import deque
 from copy import deepcopy
 from datetime import datetime
 from typing import override
 
+import litellm
 from httpx import request
 
 import openhands.agenthub.codeact_agent.function_calling as codeact_function_calling
@@ -22,6 +24,7 @@
     AgentFinishAction,
 )
 from openhands.events.event import Event
+from openhands.llm.health_check import perform_health_check
 from openhands.llm.llm import LLM
 from openhands.memory.condenser import Condenser
 from openhands.memory.condenser.condenser import Condensation, View
@@ -317,11 +320,11 @@ def step(self, state: State) -> Action:
             complexity_score = res['outputs'][0]['data'][0]
             logger.debug(f'Complexity score: {complexity_score}')
             if complexity_score > 0.3:
-                response = self.llm.completion(**params)
+                response = self._completion_with_failover(**params)
             else:
                 response = self.routing_llms['simple'].completion(**params)
         else:
-            response = self.llm.completion(**params)
+            response = self._completion_with_failover(**params)
 
         logger.debug(f'Response from LLM: {response}')
 
@@ -449,3 +452,197 @@ def _enhance_messages(self, messages: list[Message]) -> list[Message]:
             prev_role = msg.role
 
         return results
+
+    def _completion_with_failover(self, **params):
+        # First try the default LLM
+        try:
+            return self.llm.completion(**params)
+        except (litellm.RateLimitError, litellm.InternalServerError) as e:
+            logger.error(
+                f'Error completing with default LLM: {e}. Trying routing LLMs.'
+            )
+            last_exception = e
+        except Exception as e:
+            logger.error(
+                f'Error completing with default LLM: {e}. Not rate-limited or server error -> raise'
+            )
+            raise e
+
+        # If no routing LLMs available, raise the original exception
+        if not self.routing_llms:
+            error_msg = (
+                'Default LLM failed and no routing LLMs available. Try again later'
+            )
+            logger.error(error_msg)
+            raise Exception(error_msg) from last_exception
+
+        # Sort LLMs by weight in descending order
+        sorted_llms = sorted(
+            self.routing_llms.items(), key=lambda x: x[1].config.weight, reverse=True
+        )
+
+        has_simple_llm = False
+        for name, llm in sorted_llms:
+            # Skip if this is the current default LLM (comparing by config) since it's already tried
+            if (
+                llm.config.model == self.llm.config.model
+                and llm.config.api_key == self.llm.config.api_key
+                and llm.config.base_url == self.llm.config.base_url
+            ):
+                continue
+            # skip simple LLM for now in case all weight is 0
+            if name == 'simple':
+                has_simple_llm = True
+                continue
+            try:
+                resp = llm.completion(**params)
+                # If successful, assign this LLM as the new default
+                self.llm = llm
+                return resp
+            except Exception as e:
+                logger.error(f'Error completing with {name}: {e}. Trying next LLM.')
+                last_exception = e
+
+        if has_simple_llm:
+            try:
+                # for simple routing, we don't want to re-assign the llm since the model quality is not good
+                return self.routing_llms['simple'].completion(**params)
+            except Exception as e:
+                logger.error(f"Error completing with 'simple' LLM: {e}.")
+                last_exception = e
+        # If we get here, all LLMs failed
+        error_msg = 'All LLMs are not available to process this prompt. Try again later'
+        logger.error(error_msg)
+        raise Exception(error_msg) from last_exception
+
+    @override
+    async def select_llm_from_weight_and_availability(self):
+        try:
+            self.llm = await self._select_llm_from_weight_and_availability()
+            logger.info(f'Selected LLM: {self.llm.config.model}')
+        except Exception as e:
+            logger.warning(
+                f'Error selecting LLM from weight and availability: {e}. Use default LLM.'
+            )
+
+    async def _select_llm_from_weight_and_availability(
+        self, perform_health_check_fn=None, now_fn=None
+    ) -> LLM:
+        """
+        Select an LLM from a list of LLMs based on the weight and availability using round-robin selection.
+
+        Args:
+            routing_llms (dict[str, LLM]): Dictionary mapping LLM names to their instances
+            perform_health_check_fn (callable, optional): Function to perform health check (for testing)
+            now_fn (callable, optional): Function to get current datetime (for testing)
+        Returns:
+            LLM: The selected LLM instance
+        Raises:
+            ValueError: If no available LLMs are found
+        """
+
+        if not self.routing_llms:
+            raise ValueError('No LLMs available for routing')
+
+        # Get available LLMs from health check
+        models_rate_limit = await self._get_available_llms_from_health_check(
+            perform_health_check_fn
+        )
+        if not models_rate_limit:
+            raise ValueError('No available LLMs found')
+
+        # Select LLM based on weights
+        selected_name = self._select_llm_from_weights(models_rate_limit, now_fn)
+        return self.routing_llms[selected_name]
+
+    async def _get_available_llms_from_health_check(
+        self, perform_health_check_fn=None
+    ) -> dict[str, tuple[int, int, float]]:
+        """
+        Get available LLMs by performing health checks.
+
+        Args:
+            perform_health_check_fn: Function to perform health check
+
+        Returns:
+            dict[str, tuple[int, int, float]]: Dictionary mapping LLM names to their rate limits and weights
+        """
+        if not self.routing_llms:
+            raise ValueError('No LLMs available for routing')
+        if perform_health_check_fn is None:
+            perform_health_check_fn = perform_health_check
+        models_rate_limit: dict[str, tuple[int, int, float]] = {}
+
+        async def check_llm(
+            name: str, llm: LLM
+        ) -> tuple[str, tuple[int, int, float]] | None:
+            (remaining_requests, remaining_tokens) = await perform_health_check_fn(
+                {
+                    'model': llm.config.model,
+                    'api_key': llm.config.api_key,
+                    'base_url': llm.config.base_url,
+                }
+            )
+            if remaining_requests is not None and remaining_tokens is not None:
+                return name, (remaining_requests, remaining_tokens, llm.config.weight)
+            return None
+
+        tasks = [check_llm(name, llm) for name, llm in self.routing_llms.items()]
+        results = await asyncio.gather(*tasks, return_exceptions=True)
+
+        models_rate_limit = {
+            name: data
+            for result in results
+            if result is not None
+            and not isinstance(result, Exception)
+            and not isinstance(result, BaseException)
+            for name, data in [result]
+        }
+        return models_rate_limit
+
+    def _select_llm_from_weights(
+        self, models_rate_limit: dict[str, tuple[int, int, float]], now_fn=None
+    ) -> str:
+        """
+        Select an LLM based on weights using round-robin selection.
+
+        Args:
+            models_rate_limit: Dictionary mapping LLM names to their rate limits and weights
+            now_fn: Function to get current datetime
+
+        Returns:
+            str: Name of the selected LLM
+
+        Raises:
+            ValueError: If no available LLMs found or total weight is 0
+        """
+        if now_fn is None:
+            from datetime import datetime as dt
+
+            now_fn = dt.now
+        # Calculate total weight and normalize in a single pass
+        total_weight = 0.0
+        normalized_weights = {}
+        for name, (_, _, weight) in models_rate_limit.items():
+            total_weight += weight
+            normalized_weights[name] = weight
+
+        if total_weight <= 0:
+            raise ValueError('No available LLMs found')
+
+        # Normalize weights and create selection pool in one pass
+        selection_pool = []
+        for name, weight in normalized_weights.items():
+            count = int((weight / total_weight) * 100)
+            if count > 0:
+                selection_pool.extend([name] * count)
+
+        if not selection_pool:
+            # Fallback to equal weights if no weights specified
+            selection_pool = list(models_rate_limit.keys())
+
+        # Get current timestamp for deterministic but changing selection
+        current_time = int(now_fn().timestamp())
+        # Select LLM using timestamp-based index
+        selected_index = current_time % len(selection_pool)
+        return selection_pool[selected_index]
@@ -171,3 +171,9 @@ def update_agent_knowledge_base(
             for k in knowledge_base:
                 if k.get('chunkId', None):
                     self.knowledge_base[k['chunkId']] = k
+
+    async def select_llm_from_weight_and_availability(self):
+        """
+        Select an LLM from a list of LLMs based on the weight and availability using round-robin selection.
+        """
+        raise NotImplementedError('This method should be implemented by the subclass')
@@ -117,13 +117,21 @@ class AppConfig(BaseModel):
     def get_llm_config(self, name: str = 'llm') -> LLMConfig:
         """'llm' is the name for default config (for backward compatibility prior to 0.8)."""
         if name in self.llms:
-            return self.llms[name]
+            llm = self.llms[name]
+            if llm.model is not None and (
+                llm.api_key is not None or llm.base_url is not None
+            ):
+                return llm
         if name is not None and name != 'llm':
             logger.openhands_logger.warning(
                 f'llm config group {name} not found, using default config'
             )
-        if 'llm' not in self.llms:
+        if len(self.llms) == 0:
             self.llms['llm'] = LLMConfig()
+        else:
+            # Get the LLM config with highest weight
+            highest_weight_llm = max(self.llms.items(), key=lambda x: x[1].weight)
+            self.llms['llm'] = highest_weight_llm[1]
         return self.llms['llm']
 
     def set_llm_config(self, value: LLMConfig, name: str = 'llm') -> None:
 
@@ -84,6 +84,7 @@ class LLMConfig(BaseModel):
     native_tool_calling: bool | None = Field(default=None)
     reasoning_effort: str | None = Field(default='high')
     seed: int | None = Field(default=None)
+    weight: float = Field(default=0.0)
 
     model_config = {'extra': 'forbid'}
 
 
@@ -62,7 +62,7 @@ def get_optional_type(union_type: UnionType | type | None) -> type | None:
     # helper function to set attributes based on env vars
     def set_attr_from_env(sub_config: BaseModel, prefix: str = '') -> None:
         """Set attributes of a config model based on environment variables."""
-        for field_name, field_info in sub_config.model_fields.items():
+        for field_name, field_info in sub_config.__class__.model_fields.items():
             field_value = getattr(sub_config, field_name)
             field_type = field_info.annotation
 
 
@@ -99,6 +99,7 @@ async def run_controller(
     if agent is None:
         agent = create_agent(config)
         mcp_tools = await fetch_mcp_tools_from_config(config.dict_mcp_config, sid=sid)
+        await agent.select_llm_from_weight_and_availability()
         logger.info(f'MCP tools: {mcp_tools}')
         agent.set_mcp_tools(mcp_tools)
 
 
@@ -0,0 +1,56 @@
+# This file runs a health check for the LLM, used on litellm/proxy
+import random
+
+import litellm
+from pydantic import SecretStr
+
+from openhands.core.logger import openhands_logger as logger
+
+
+def _get_random_llm_message():
+    """
+    Get a random message from the LLM.
+    """
+    messages = ["Hey how's it going?", "What's 1 + 1?"]
+
+    return [{'role': 'user', 'content': random.choice(messages)}]
+
+
+# NOTE: are default values sufficient?
+async def perform_health_check(
+    model_params: dict,
+    min_remaining_requests: int = 20,
+    min_remaining_tokens: int = 20000,
+):
+    """
+    Perform a health check for each model in the list.
+    model_params must have the following keys:
+    - model
+    - api_key
+    - base_url (optional)
+    """
+    model_params['messages'] = _get_random_llm_message()
+    api_key: SecretStr = model_params.get('api_key', None)
+    if api_key is None:
+        raise ValueError('api_key is required')
+    if model_params.get('model', None) is None:
+        raise ValueError('model is required')
+    api_key_str = api_key.get_secret_value()
+    model_params['api_key'] = api_key_str
+    try:
+        result = await litellm.ahealth_check(
+            model_params=model_params,
+            mode='chat',
+        )
+        remaining_requests = int(result.get('x-ratelimit-remaining-requests', 0))
+        remaining_tokens = int(result.get('x-ratelimit-remaining-tokens', 0))
+        if (
+            remaining_requests > min_remaining_requests
+            and remaining_tokens > min_remaining_tokens
+        ):
+            return remaining_requests, remaining_tokens
+        else:
+            return None, None
+    except Exception as e:
+        logger.error(f'Error performing health check: {e}')
+        return None, None
@@ -223,7 +223,7 @@ def wrapper(*args: Any, **kwargs: Any) -> Any:
 
             messages: list[dict[str, Any]] | dict[str, Any] = []
             mock_function_calling = not self.is_function_calling_active()
-            logger.info(f'Mock function calling: {mock_function_calling}')
+            logger.debug(f'Mock function calling: {mock_function_calling}')
             # Add session_id and user_id as span attributes if they exist
             try:
                 span = trace.get_current_span()