o
    `۷i,-                     @   s  U d Z ddlZddlZddlZddlmZ ddlmZ ddlm	Z	m
Z
mZmZ ddlZddlmZ ddlmZ ddlmZmZ dd	lmZ dd
lmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$ e%eZ&da'de(d< da)ee(d< e$eG dd dZ*	dQde+de+dee fddZ,dd Z-dd Z.ddd ed!e	d"ed#e"d$e/d%ee	e0e0gdf  fd&d'Z1dRde+defd(d)Z2edd*G d+d, d,Z3ej4d-dd.Z5ej4d/dd.Z6d0d1 Z7d2d3 Z8	4	4	4	4	4dSd5e0d6e0d7e0d8e0d9e0f
d:d;Z9d<d= Z:d>ee3 fd?d@Z;ee<Z=e
e0e
e0ej>f f e(dA< dBe0de
e0ej>f fdCdDZ?dBe0dEe0fdFdGZ@dBe0dEe0fdHdIZAee<ZBe
e0e
e0ef f e(dJ< dKdL ZCdMdN ZDdOdP ZEdS )Tz
This file stores global state for a Serve application. Deployment replicas
can use this state to access metadata or the Serve controller.
    N)defaultdict)	dataclass)CallableDictListOptional)RayActorError)ServeControllerClient)DeploymentID	ReplicaID)DeploymentConfig)SERVE_CONTROLLER_NAMESERVE_LOGGER_NAMESERVE_NAMESPACE)ReplicaResult)RayServeException)RayServegRPCContext)ReplicaRank)DeveloperAPIReplicaContext_INTERNAL_REPLICA_CONTEXT_global_clientc                   @   s   e Zd ZU dZeed< eed< eed< eed< e	ed< dZ
eeegdf  ed< ed	efd
dZed	efddZed	efddZdS )r   a  Stores runtime context info for replicas.

    Fields:
        - app_name: name of the application the replica is a part of.
        - deployment: name of the deployment the replica is a part of.
        - replica_tag: unique ID for the replica.
        - servable_object: instance of the user class/function this replica is running.
        - rank: the rank of the replica.
        - world_size: the number of replicas in the deployment.
    
replica_idservable_object_deployment_configrank
world_sizeN_handle_registration_callbackreturnc                 C   
   | j jjS N)r   deployment_idapp_nameself r%   G/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/serve/context.pyr"   8      
zReplicaContext.app_namec                 C   r   r    )r   r!   namer#   r%   r%   r&   
deployment<   r'   zReplicaContext.deploymentc                 C   s   | j jS r    )r   	unique_idr#   r%   r%   r&   replica_tag@      zReplicaContext.replica_tag)__name__
__module____qualname____doc__r   __annotations__r   r   r   intr   r   r
   propertystrr"   r)   r+   r%   r%   r%   r&   r   #   s   
 FT_health_check_controllerraise_if_no_controller_runningr   c                 C   sZ   zt dur| rtt jj  t W S W t
|S  ty,   td t	d Y t
|S w )a  Gets the global client, which stores the controller's handle.

    Args:
        _health_check_controller: If True, run a health check on the
            cached controller if it exists. If the check fails, try reconnecting
            to the controller.
        raise_if_no_controller_running: Whether to raise an exception if
            there is no currently running Serve controller.

    Returns:
        ServeControllerClient to the running Serve controller. If there
        is no running controller and raise_if_no_controller_running is
        set to False, returns None.

    Raises:
        RayServeException: If there is no running Serve controller actor
            and raise_if_no_controller_running is set to True.
    Nz-The cached controller has died. Reconnecting.)r   rayget_controllercheck_aliveremoter   loggerinfo_set_global_client_connect)r5   r6   r%   r%   r&   _get_global_clientE   s   

r@   c                 C   s   | a d S r    )r   )clientr%   r%   r&   r>   g   r,   r>   c                   C   s   t S r    )r   r%   r%   r%   r&   _get_internal_replica_contextl   s   rB   )handle_registration_callbackr   r   r   r   r   rC   c                 C   s   t | |||||dad S )N)r   r   r   r   r   r   )r   r   )r   r   r   r   r   rC   r%   r%   r&   _set_internal_replica_contextp   s   

rD   c                 C   sh   dt jjj_t  st jtd z	t jt	td}W n t
y)   | r&tdY dS w t|}t| |S )aO  Connect to an existing Serve application on this Ray cluster.

    If called from within a replica, this will connect to the same Serve
    app that the replica is running in.

    Returns:
        ServeControllerClient that encapsulates a Ray actor handle to the
        existing Serve application's Serve Controller. None if there is
        no running Serve controller actor and raise_if_no_controller_running
        is set to False.

    Raises:
        RayServeException: If there is no running Serve controller actor
            and raise_if_no_controller_running is set to True.
    F)	namespacez7There is no Serve instance running on this Ray cluster.N)r7   _privateworkerglobal_worker_filter_logs_by_jobis_initializedinitr   	get_actorr   
ValueErrorr   r	   r>   )r6   
controllerrA   r%   r%   r&   r?      s"   r?   )frozenc                   @   s   e Zd ZU dZeed< dZeed< dZeed< dZeed< dZ	eed< dZ
ee ed< d	Zeed
< d	Zeed< dZee ed< dS )_RequestContext route
request_id_internal_request_idr"   multiplexed_model_idNgrpc_contextFis_http_requestcancel_on_parent_request_cancel_ray_trace_ctx)r-   r.   r/   rR   r4   r1   rS   rT   r"   rU   rV   r   r   rW   boolrX   rY   dictr%   r%   r%   r&   rP      s   
 rP   z'Serve internal request context variable)defaultz0Serve internal batching request context variablec                   C   s    t  du rt t  t  S )zWGet the current request context.

    Returns:
        The current request context
    N)_serve_request_contextr8   setrP   r%   r%   r%   r&   _get_serve_request_context   s   r_   c                   C   s   t  du rt g  t  S )z7Get the list of request contexts for the current batch.N)_serve_batch_request_contextr8   r^   r%   r%   r%   r&    _get_serve_batch_request_context   s   
ra   rQ   rR   rS   rT   r"   rU   c              	   C   s@   t  }tt| p
|j|p|j|p|j|p|j|p|jd dS )z]Set the request context. If the value is not set,
    the current context value will be used.)rR   rS   rT   r"   rU   N)	r_   r]   r^   rP   rR   rS   rT   r"   rU   )rR   rS   rT   r"   rU   current_request_contextr%   r%   r&   _set_request_context   s   
rc   c                   C   s   t t  dS )zUnset the request context.N)r]   r^   rP   r%   r%   r%   r&   _unset_request_context   s   rd   request_contextsc                 C   s   t |  dS )z5Add the request context to the batch request context.N)r`   r^   )re   r%   r%   r&   _set_batch_request_context  s   rf   _requests_pending_assignmentparent_request_idc                 C      | t v rt |  S i S r    rg   rh   r%   r%   r&    _get_requests_pending_assignment     rl   response_idc                 C      |t |  |< d S r    rj   )rh   rn   taskr%   r%   r&   _add_request_pending_assignment"  s   rq   c                 C   4   |t |  v rt |  |= tt |  dkrt | = d S d S Nr   )rg   lenrh   rn   r%   r%   r&   "_remove_request_pending_assignment(  
   

rv   _in_flight_requestsc                 C   ri   r    rx   rk   r%   r%   r&   _get_in_flight_requests?  rm   rz   c                 C   ro   r    ry   )rh   rn   replica_resultr%   r%   r&   _add_in_flight_requestF  s   r|   c                 C   rr   rs   )rx   rt   ru   r%   r%   r&   _remove_in_flight_requestJ  rw   r}   )FT)T)rQ   rQ   rQ   rQ   rQ   )Fr0   asynciocontextvarsloggingcollectionsr   dataclassesr   typingr   r   r   r   r7   ray.exceptionsr   ray.serve._private.clientr	   ray.serve._private.commonr
   r   ray.serve._private.configr   ray.serve._private.constantsr   r   r   !ray.serve._private.replica_resultr   ray.serve.exceptionsr   ray.serve.grpc_utilr   ray.serve.schemar   ray.util.annotationsr   	getLoggerr<   r   r1   r   r   rZ   r@   r>   rB   r2   r4   rD   r?   rP   
ContextVarr]   r`   r_   ra   rc   rd   rf   r[   rg   Taskrl   rq   rv   rx   rz   r|   r}   r%   r%   r%   r&   <module>   s    
!
"
6
" 