115115 Optional [float ],
116116 Optional [int ],
117117 Optional [str ],
118+ int ,
118119]
119120
120121
@@ -356,6 +357,7 @@ def __init__(
356357 version : DeploymentVersion ,
357358 ingress : bool ,
358359 route_prefix : str ,
360+ rank : int ,
359361 ):
360362 self ._version = version
361363 self ._replica_id = replica_id
@@ -402,7 +404,7 @@ def __init__(
402404
403405 # Set metadata for logs and metrics.
404406 # servable_object will be populated in `initialize_and_get_metadata`.
405- self ._set_internal_replica_context (servable_object = None )
407+ self ._set_internal_replica_context (servable_object = None , rank = rank )
406408
407409 self ._metrics_manager = create_replica_metrics_manager (
408410 replica_id = replica_id ,
@@ -422,19 +424,27 @@ def get_num_ongoing_requests(self) -> int:
422424 return self ._metrics_manager .get_num_ongoing_requests ()
423425
424426 def get_metadata (self ) -> ReplicaMetadata :
427+ current_rank = ray .serve .context ._get_internal_replica_context ().rank
425428 return (
426429 self ._version .deployment_config ,
427430 self ._version ,
428431 self ._initialization_latency ,
429432 self ._port ,
430433 self ._docs_path ,
434+ current_rank ,
431435 )
432436
433- def _set_internal_replica_context (self , * , servable_object : Callable = None ):
437+ def _set_internal_replica_context (
438+ self , * , servable_object : Callable = None , rank : int = None
439+ ):
440+ # Calculate world_size from deployment config instead of storing it
441+ world_size = self ._deployment_config .num_replicas
434442 ray .serve .context ._set_internal_replica_context (
435443 replica_id = self ._replica_id ,
436444 servable_object = servable_object ,
437445 _deployment_config = self ._deployment_config ,
446+ rank = rank ,
447+ world_size = world_size ,
438448 )
439449
440450 def _configure_logger_and_profilers (
@@ -752,7 +762,10 @@ async def initialize(self, deployment_config: DeploymentConfig):
752762 raise RuntimeError (traceback .format_exc ()) from None
753763
754764 async def reconfigure (
755- self , deployment_config : DeploymentConfig , route_prefix : Optional [str ] = None
765+ self ,
766+ deployment_config : DeploymentConfig ,
767+ rank : int ,
768+ route_prefix : Optional [str ] = None ,
756769 ):
757770 try :
758771 user_config_changed = (
@@ -782,9 +795,10 @@ async def reconfigure(
782795 )
783796
784797 # We need to update internal replica context to reflect the new
785- # deployment_config.
798+ # deployment_config and rank .
786799 self ._set_internal_replica_context (
787- servable_object = self ._user_callable_wrapper .user_callable
800+ servable_object = self ._user_callable_wrapper .user_callable ,
801+ rank = rank ,
788802 )
789803
790804 self ._route_prefix = self ._version .route_prefix
@@ -894,8 +908,11 @@ async def record_routing_stats(self) -> Dict[str, Any]:
894908
895909class Replica (ReplicaBase ):
896910 async def _on_initialized (self ):
911+ # Get current rank from replica context during initialization
912+ current_rank = ray .serve .context ._get_internal_replica_context ().rank
897913 self ._set_internal_replica_context (
898- servable_object = self ._user_callable_wrapper .user_callable
914+ servable_object = self ._user_callable_wrapper .user_callable ,
915+ rank = current_rank ,
899916 )
900917
901918 # Save the initialization latency if the replica is initializing
@@ -969,6 +986,7 @@ async def __init__(
969986 version : DeploymentVersion ,
970987 ingress : bool ,
971988 route_prefix : str ,
989+ rank : int ,
972990 ):
973991 deployment_config = DeploymentConfig .from_proto_bytes (
974992 deployment_config_proto_bytes
@@ -985,6 +1003,7 @@ async def __init__(
9851003 version = version ,
9861004 ingress = ingress ,
9871005 route_prefix = route_prefix ,
1006+ rank = rank ,
9881007 )
9891008
9901009 def push_proxy_handle (self , handle : ActorHandle ):
@@ -1047,9 +1066,9 @@ async def record_routing_stats(self) -> Dict[str, Any]:
10471066 return await self ._replica_impl .record_routing_stats ()
10481067
10491068 async def reconfigure (
1050- self , deployment_config , route_prefix : Optional [str ] = None
1069+ self , deployment_config , rank : int , route_prefix : Optional [str ] = None
10511070 ) -> ReplicaMetadata :
1052- await self ._replica_impl .reconfigure (deployment_config , route_prefix )
1071+ await self ._replica_impl .reconfigure (deployment_config , rank , route_prefix )
10531072 return self ._replica_impl .get_metadata ()
10541073
10551074 def _preprocess_request_args (
0 commit comments