1+ import asyncio
12import json
23import os
34from collections import deque
@@ -571,7 +572,10 @@ async def _get_available_llms_from_health_check(
571572 if perform_health_check_fn is None :
572573 perform_health_check_fn = perform_health_check
573574 models_rate_limit : dict [str , tuple [int , int , float ]] = {}
574- for name , llm in self .routing_llms .items ():
575+
576+ async def check_llm (
577+ name : str , llm : LLM
578+ ) -> tuple [str , tuple [int , int , float ]] | None :
575579 (remaining_requests , remaining_tokens ) = await perform_health_check_fn (
576580 {
577581 'model' : llm .config .model ,
@@ -580,11 +584,20 @@ async def _get_available_llms_from_health_check(
580584 }
581585 )
582586 if remaining_requests is not None and remaining_tokens is not None :
583- models_rate_limit [name ] = (
584- remaining_requests ,
585- remaining_tokens ,
586- llm .config .weight ,
587- )
587+ return name , (remaining_requests , remaining_tokens , llm .config .weight )
588+ return None
589+
590+ tasks = [check_llm (name , llm ) for name , llm in self .routing_llms .items ()]
591+ results = await asyncio .gather (* tasks , return_exceptions = True )
592+
593+ models_rate_limit = {
594+ name : data
595+ for result in results
596+ if result is not None
597+ and not isinstance (result , Exception )
598+ and not isinstance (result , BaseException )
599+ for name , data in [result ]
600+ }
588601 return models_rate_limit
589602
590603 def _select_llm_from_weights (
0 commit comments