o
    `۷ii                     @   s  d dl Z d dlZd dlmZmZmZmZmZmZm	Z	m
Z
 d dlmZ d dlmZ d dlmZ d dlmZ d dlmZmZmZmZmZmZmZ d dlmZ d d	lmZ d d
lm Z m!Z!m"Z"m#Z#m$Z$m%Z%m&Z& d dl'm(Z(m)Z) d dl*m+Z+m,Z,m-Z-m.Z.m/Z/m0Z0 d dl1m,Z2m3Z4m5Z5m6Z7m8Z9m:Z;m0Z< d dl=m>Z> de5de?fddZ@dedefddZAG dd deZ3de
eBe(f dee
ee,e(f  fddZCG dd dZ:de
deDe/f d e
deEe.f de.fd!d"ZFdS )#    N)AnyCallableDictListOptionalSetTupleUnion)FieldDescriptor)Message)cloudpickle)ray_option_utils)	BaseModelFieldNonNegativeFloatNonNegativeIntPositiveFloatPositiveInt	validator)pickle_dumps)resources_from_ray_options)DEFAULT_CONSTRUCTOR_RETRY_COUNT#DEFAULT_GRACEFUL_SHUTDOWN_TIMEOUT_S%DEFAULT_GRACEFUL_SHUTDOWN_WAIT_LOOP_SDEFAULT_HEALTH_CHECK_PERIOD_SDEFAULT_HEALTH_CHECK_TIMEOUT_SDEFAULT_MAX_ONGOING_REQUESTSMAX_REPLICAS_PER_NODE_MAX_VALUE)DEFAULTDeploymentOptionUpdateType)AggregationFunctionAutoscalingConfigDeploymentModeHTTPOptionsProxyLocationRequestRouterConfig)r!   DeploymentConfigDeploymentLanguageEncodingTypeLoggingConfigReplicaConfigr%   )validate_placement_groupdeployment_languageis_cross_languagec                 C   s(   | t jkr	|s	dS | t jkr|rdS dS )zGFrom Serve client API's perspective, decide whether pickling is needed.TF)r'   PYTHONJAVA)r,   r-    r0   O/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/serve/_private/config.py_needs_pickle3   s
   r2   protoreturnc                 C   s   i }|   D ]6\}}|jtjkr)|jtjkr!dd |D ||j< qt|||j< q|jtjkr7t|||j< q|||j< q| j	j
D ]}|j|vrW|jtjkrW|jsW|j||j< qA|S )zRecursively convert a protobuf into a Python dictionary.

    This is an alternative to protobuf's `MessageToDict`. Unlike
    `MessageToDict`, this function doesn't add an extra base64
    encoding to bytes when constructing a json response.
    c                 S   s   g | ]}t |qS r0   )_proto_to_dict).0vr0   r0   r1   
<listcomp>P       z"_proto_to_dict.<locals>.<listcomp>)
ListFieldslabelr
   LABEL_REPEATEDtypeTYPE_MESSAGEnamelistr5   
DESCRIPTORfieldscontaining_oneofdefault_value)r3   datafieldvaluer0   r0   r1   r5   @   s&   
r5   c                   @   s  e Zd ZU dZedejdZee	 e
d< eeejdZee
d< edejdZee
d< edejdZee
d	< eeejdZee
d
< eeejdZee
d< eeejdZee
d< eeejdZee
d< edejdZee e
d< ee ejdZ!e e
d< dZ"e#e
d< e$j%Z&ee
d< edej'dZ(ee) e
d< edejdZ*ee+ e
d< ee,ejdZ-ee
d< e. Z/e0e) e
d< G dd dZ1e2d	dddd Z3e2ddddd Z4e2dddd d! Z5d"d# Z6d$d% Z7d&d' Z8d(d) Z9e:d*e;fd+d,Z<e:d-e=fd.d/Z>e:d0d1 Z?dS )2r&   a  Internal datastructure wrapping config options for a deployment.

    Args:
        num_replicas: The number of processes to start up that
            handles requests to this deployment. Defaults to 1.
        max_ongoing_requests: The maximum number of queries
            that is sent to a replica of this deployment without receiving
            a response. Defaults to 5.
        max_queued_requests: Maximum number of requests to this deployment that will be
            queued at each *caller* (proxy or DeploymentHandle). Once this limit is
            reached, subsequent requests will raise a BackPressureError (for handles) or
            return an HTTP 503 status code (for HTTP requests). Defaults to -1 (no
            limit).
        user_config: Arguments to pass to the reconfigure
            method of the deployment. The reconfigure method is called if
            user_config is not None. Must be JSON-serializable.
        graceful_shutdown_wait_loop_s: Duration
            that deployment replicas wait until there is no more work to
            be done before shutting down.
        graceful_shutdown_timeout_s: Controller waits for this duration
            to forcefully kill the replica for shutdown.
        health_check_period_s: Frequency at which the controller health
            checks replicas.
        health_check_timeout_s: Timeout that the controller waits for a
            response from the replica's health check before marking it
            unhealthy.
        autoscaling_config: Autoscaling configuration.
        logging_config: Configuration for deployment logs.
        user_configured_option_names: The names of options manually
            configured by the user.
        request_router_config: Configuration for deployment request router.
        max_constructor_retry_count: Maximum number of times to retry the
            deployment constructor. Defaults to 20.
       )defaultupdate_typenum_replicasmax_ongoing_requestsmax_queued_requestsNuser_configgraceful_shutdown_timeout_sgraceful_shutdown_wait_loop_shealth_check_period_shealth_check_timeout_sautoscaling_config)default_factoryrJ   request_router_configFr-   r,   versionlogging_configmax_constructor_retry_countuser_configured_option_namesc                   @   s   e Zd ZdZdZdS )zDeploymentConfig.ConfigTN)__name__
__module____qualname__validate_assignmentarbitrary_types_allowedr0   r0   r0   r1   Config   s    r`   T)alwaysc              
   C   sX   t |tr|S |d ur*zt| W |S  ty) } z
tdt| dd }~ww |S )Nz&user_config is not JSON-serializable: .)
isinstancebytesjsondumps	TypeError
ValueErrorstr)clsr7   er0   r0   r1   user_config_json_serializable   s   
z.DeploymentConfig.user_config_json_serializablec                 C   sL   |d u r|S t |tstdt| dddlm} |di | }|S )NzGot invalid type 'z,' for logging_config. Expected a dictionary.r   )r)   r0   )rc   dictrg   r=   ray.serve.schemar)   )rj   r7   r)   r0   r0   r1   logging_config_valid   s   
z%DeploymentConfig.logging_config_validc                 C   s.   t |ts	td|dk r|dkrtd|S )Nz'max_queued_requests must be an integer.rH   rM   z@max_queued_requests must be -1 (no limit) or a positive integer.)rc   intrg   rh   )rj   r7   r0   r0   r1   validate_max_queued_requests   s   
z-DeploymentConfig.validate_max_queued_requestsc                 C   s   t | j| jS N)r2   r,   r-   selfr0   r0   r1   needs_pickle   s   zDeploymentConfig.needs_picklec                 C   sL  |   }|dd ur|  rt|d |d< |dr3| jjj|d d d< tdi |d |d< |drt|d d}|d ura|sLd|d d< n|  rZt||d d< nt	d| | j
j|d d	< tdi |d |d< |d
rd|d
 v rt|d
 d |d
 d< tdi |d
 |d
< t|d |d< tdi |S )NrO   rT   policy_serialized_policy_defrV   request_router_kwargs    zRNon-empty request_router_kwargs not supportedfor cross-language deployments. Got: _serialized_request_router_clsrX   encodingrZ   r0   )rm   getru   r   rf   rT   rv   rw   AutoscalingConfigProtorh   rV   rz   RequestRouterConfigProtoEncodingTypeProtoValueLoggingConfigProtor@   DeploymentConfigProto)rt   rE   router_kwargsr0   r0   r1   to_proto   sX   






zDeploymentConfig.to_protoc                 C      |    S rr   r   SerializeToStringrs   r0   r0   r1   to_proto_bytes,     zDeploymentConfig.to_proto_bytesc                 C   s   |   S rr   )rm   rs   r0   r0   r1   to_dict/  s   zDeploymentConfig.to_dictr3   c                 C   s"  t |}d|v r|d ntj}d|v r|d nd}t||}d|v r=|d dkr9|r3t|j|d< n
|j|d< nd |d< d|v ryd|d v rn|d d }|dkrh|r_t|jj|d d< n|jj|d d< ni |d d< t	di |d |d< d|v r|d 
d	sd |d d	< |d 
d
sd |d d
< |d 
dsd |d d< |d 
dsd |d d< |d 
dsd |d d< |d 
dstj|d d< tdi |d |d< d|v r|d dkrd |d< d|v rt|d |d< d|v r
d|d v r
t|d d |d d< | di |S )Nr,   r-   FrO   ry   rV   rx   rT   upscale_smoothing_factordownscale_smoothing_factorupscaling_factordownscaling_factortarget_ongoing_requestsaggregation_functionrW    rZ   rX   r{   r0   )r5   r'   r.   r2   r   loadsrO   rV   rx   r%   r|   r    MEANr!   setr   Name)rj   r3   rE   r,   r-   ru   rx   r0   r0   r1   
from_proto3  s   



zDeploymentConfig.from_protoproto_bytesc                 C   s   t |}| |S rr   )r   
FromStringr   )rj   r   r3   r0   r0   r1   from_proto_bytes|  s   

z!DeploymentConfig.from_proto_bytesc              	   K   s   |  }t |  }| D ]\}}||vr'td| d| dt| dqdd | D }| D ]
\}}||| q5|S )zCreates a default DeploymentConfig and overrides it with kwargs.

        Ignores any kwargs set to DEFAULT.VALUE.

        Raises:
            TypeError: when a keyword that's not an argument to the class is
                passed in.
        z&Got invalid Deployment config option "z" (with value zO) as keyword argument. All Deployment config options must come from this list: rb   c                 S   s    i | ]\}}|t jkr||qS r0   )r   VALUE)r6   keyvalr0   r0   r1   
<dictcomp>  s     z1DeploymentConfig.from_default.<locals>.<dictcomp>)r   rm   keysitemsrg   r@   __setattr__)rj   kwargsconfigvalid_config_optionsr   r   r0   r0   r1   from_default  s    zDeploymentConfig.from_default)@r[   r\   r]   __doc__r   r   LightWeightrK   r   r   __annotations__r   NeedsActorReconfigurerL   r   rN   rp   rO   r   r   NeedsReconfigurerP   r   r   rQ   r   rR   r   r   rS   rT   r!   r%   rV   r-   boolr'   r.   r,   HeavyWeightrW   ri   rX   rm   r   rY   r   rZ   r   r`   r   rl   ro   rq   ru   r   r   r   classmethodr   r   rd   r   r   r0   r0   r0   r1   r&   f   s   
 #





0Hr&   rL   rT   c                 C   sh   |t jdfv rt }| |fS t jdd}t|tr|n|jdd}|| tdi |}| |fS )aF  Return modified `max_ongoing_requests` and `autoscaling_config`
    for when num_replicas="auto".

    If `autoscaling_config` is unspecified, returns the modified value
    AutoscalingConfig.default().
    If it is specified, the specified fields in `autoscaling_config`
    override that of AutoscalingConfig.default().
    NTexclude_unsetr0   )r   r   r!   rI   rm   rc   update)rL   rT   default_configr0   r0   r1   handle_num_replicas_auto  s   

r   c                   @   sb  e Zd ZdZ						d5dededededed	eeeee	f   d
ee deeeeef   deeeee
f   dee defddZdd Zdd Z					d6ded	eeeee	f   d
ee deeeeef   deeeee
f   dee fddZe									d7deeef deee
  deee
e
f  dee d	eeeee	f   d
ee deeeeef   deeeee
f   dee dee fddZdd Zd8dd Zd8d!d"Zedeeef fd#d$Zedeeee
 ef  fd%d&Zedeee
  fd'd(Zed9d)edefd*d+Zed9d,edefd-d.Z d/d0 Z!d1d2 Z"d3d4 Z#dS ):r*   a>  Internal datastructure wrapping config options for a deployment's replicas.

    Provides five main properties (see property docstrings for more info):
        deployment_def: the code, or a reference to the code, that this
            replica should run.
        init_args: the deployment_def's init_args.
        init_kwargs: the deployment_def's init_kwargs.
        ray_actor_options: the Ray actor options to pass into the replica's
            actor.
        resource_dict: contains info on this replica's actor's resource needs.

    Offers a serialized equivalent (e.g. serialized_deployment_def) for
    deployment_def, init_args, and init_kwargs. Deserializes these properties
    when they're first accessed, if they were not passed in directly through
    create().

    Use the classmethod create() to make a ReplicaConfig with the deserialized
    properties.

    Note: overwriting or setting any property after the ReplicaConfig has been
    constructed is currently undefined behavior. The config's fields should not
    be modified externally after it is created.
    NTdeployment_def_nameserialized_deployment_defserialized_init_argsserialized_init_kwargsray_actor_optionsplacement_group_bundlesplacement_group_strategy%placement_group_bundle_label_selector!placement_group_fallback_strategymax_replicas_per_noderu   c                 C   st   || _ || _|| _|| _d| _d| _d| _|| _|| _|| _	|| _
|	| _|
| _|   |   t| j| _|| _dS )zConstruct a ReplicaConfig with serialized properties.

        All parameters are required. See classmethod create() for defaults.
        N)r   r   r   r   _deployment_def
_init_args_init_kwargsr   r   r   r   r   r    _normalize_bundle_label_selector	_validater   resource_dictru   )rt   r   r   r   r   r   r   r   r   r   r   ru   r0   r0   r1   __init__  s$   
zReplicaConfig.__init__c                    sd   | j r*| jr,t| jdkr.t| j dkr0| jd   fddtt| j D | _dS dS dS dS dS )zsIf a single selector is provided for multiple bundles, it is broadcasted
        uniformly to all bundles.
        rH   r   c                    s   g | ]}   qS r0   )copy)r6   _single_selectorr0   r1   r8     r9   zBReplicaConfig._normalize_bundle_label_selector.<locals>.<listcomp>N)r   r   lenrangers   r0   r   r1   r     s   

z.ReplicaConfig._normalize_bundle_label_selectorc                 C   s<   |    |   |   | jd ur| jd urtdd S d S )NzVSetting max_replicas_per_node is not allowed when placement_group_bundles is provided.)_validate_ray_actor_options!_validate_placement_group_options_validate_max_replicas_per_noder   r   rh   rs   r0   r0   r1   r     s   

zReplicaConfig._validatec                 C   sD   || _ || _|| _|| _|| _|| _|   |   t| j | _	d S rr   )
r   r   r   r   r   r   r   r   r   r   )rt   r   r   r   r   r   r   r0   r0   r1   r   +  s   	zReplicaConfig.updatedeployment_def	init_argsinit_kwargsc                 C   s&  t |st|tstd|du st|ttfstd|du s)t|ts)tdt|r:|r4t	d|r:t	dt|t
tfsKtdt| d|du rQd	}|du rWi }|du r]i }|
du rlt|tri|}
n|j}
| |
t|d
t| t|dt|d||||||	d
}||_||_||_|S )z4Create a ReplicaConfig from deserialized parameters.z8@serve.deployment must be called on a class or function.Nzinit_args must be a tuple.zinit_kwargs must be a dict.z1init_args not supported for function deployments.z3init_kwargs not supported for function deployments.Got invalid type "zQ" for deployment_def. Expected deployment_def to be a class, function, or string.r0   z#Could not serialize the deployment z,Could not serialize the deployment init argsz.Could not serialize the deployment init kwargs)
r   r   r   r   r   r   r   r   r   r   )callablerc   ri   rg   tupler@   rm   inspect
isfunctionrh   r   r=   r[   r   reprr   r   r   )rj   r   r   r   r   r   r   r   r   r   r   r   r0   r0   r1   createD  s`   

zReplicaConfig.createc                 C   s   t | jtstdt| j dh d}| jD ]}||vr(td| d| qtj| jdd | jdd u r@d	| jd< d S d S )
Nr   z/" for ray_actor_options. Expected a dictionary.>   memorynum_cpusnum_gpus	resourcesruntime_envlabel_selectoraccelerator_typefallback_strategyzSpecifying 'z8' in ray_actor_options is not allowed. Allowed options: T)
in_optionsr   rH   )	rc   r   rm   rg   r=   rh   r   validate_actor_optionsr|   )rt   allowed_ray_actor_optionsoptionr0   r0   r1   r     s"   
z)ReplicaConfig._validate_ray_actor_optionsr4   c                 C   sf   | j d u rd S t| j tstdt| j  dt d| j dk s%| j tkr1td| j  dt dd S )NzGet invalid type 'zM' for max_replicas_per_node. Expected None or an integer in the range of [1, z].rH   zInvalid max_replicas_per_node z:. Valid values are None or an integer in the range of [1, )r   rc   rp   rg   r=   r   rh   rs   r0   r0   r1   r     s    



z-ReplicaConfig._validate_max_replicas_per_nodec                 C   s  | j d ur| jd u rtd| jd urI| jd u rtdt| jts-tdt| j dt| jD ]\}}t|t	sHtd| dt| dq2| j
d urW| jd u rWtd| jd urt| j| j pcdd	| j
d
 d}| jd }|dd}| jdd}||k rt| d| d| d|dd}| jdd}||k rt| d| d| d| jdi }	|	 D ]#\}
}||
d}||k rt| d|
 d| d| d|
 d
qd S d S )Nz[If `placement_group_strategy` is provided, `placement_group_bundles` must also be provided.zdIf `placement_group_fallback_strategy` is provided, `placement_group_bundles` must also be provided.zGplacement_group_fallback_strategy must be a list of dictionaries. Got: rb   z1placement_group_fallback_strategy entry at index z must be a dictionary. Got: zhIf `placement_group_bundle_label_selector` is provided, `placement_group_bundles` must also be provided.PACKdetached)bundlesstrategylifetimebundle_label_selectorzWhen using `placement_group_bundles`, the replica actor will be placed in the first bundle, so the resource requirements for the actor must be a subset of the first bundle.r   CPUr   z `num_cpus` for the actor is z, but the bundle only has z `CPU` specified.GPUr   z `num_gpus` for the actor is z `GPU` specified.r   z `z` requirement for the actor is z` specified.)r   r   rh   r   rc   r@   rg   r=   	enumeraterm   r   r+   r|   r   r   )rt   ir   resource_error_prefixfirst_bundle
bundle_cpureplica_actor_num_cpus
bundle_gpureplica_actor_num_gpusreplica_actor_resourcesactor_resourceactor_valuebundle_valuer0   r0   r1   r     s   








%z/ReplicaConfig._validate_placement_group_optionsc                 C   s:   | j du r| jrt| j| _ | j S | jjdd| _ | j S )aD  The code, or a reference to the code, that this replica runs.

        For Python replicas, this can be one of the following:
            - Function (Callable)
            - Class (Callable)
            - Import path (str)

        For Java replicas, this can be one of the following:
            - Class path (str)
        Nzutf-8)r{   )r   ru   r   r   r   decoders   r0   r0   r1   r     s   
zReplicaConfig.deployment_defc                 C   s2   | j du r| jrt| j| _ | j S | j| _ | j S )zThe init_args for a Python class.

        This property is only meaningful if deployment_def is a Python class.
        Otherwise, it is None.
        N)r   ru   r   r   r   rs   r0   r0   r1   r   )  s   
zReplicaConfig.init_argsc                 C   s   | j du rt| j| _ | j S )zThe init_kwargs for a Python class.

        This property is only meaningful if deployment_def is a Python class.
        Otherwise, it is None.
        N)r   r   r   r   rs   r0   r0   r1   r   8  s   
zReplicaConfig.init_kwargsr3   c                 C   s   t |j|j|jdkr|jnd |jdkr|jnd t|j|jr%t|jnd |j	dkr.|j	nd |j
r8t|j
nd |jrBt|jnd |jrL|j|dS d |dS )Nry   r   )r   r   r   r   r   r   r   r   r   r   ru   )r*   r   r   r   r   re   r   r   r   r   r   r   r   )rj   r3   ru   r0   r0   r1   r   E  s0   


zReplicaConfig.from_protor   c                 C   s   t |}| ||S rr   )ReplicaConfigProtor   r   )rj   r   ru   r3   r0   r0   r1   r   i  s   
zReplicaConfig.from_proto_bytesc                 C   s   | j d urt| j nd}| jd urt| jnd}| jd ur%t| jnd}| jd ur/| jnd}t| j| j| j	| j
t| j|| j|||d
S )Nr   r   )
r   r   r   r   r   r   r   r   r   r   )r   re   rf   r   r   r   r   r   r   r   r   r   r   )rt   r   r   r   r   r0   r0   r1   r   n  s4   



zReplicaConfig.to_protoc                 C   r   rr   r   rs   r0   r0   r1   r     r   zReplicaConfig.to_proto_bytesc                 C   s"   | j | j| j| j| j| j| jdS )Nr   r   r   r   r   r   r   r   rs   r0   r0   r1   r     s   zReplicaConfig.to_dict)NNNNNT)NNNNN)	NNNNNNNNN)r4   N)T)$r[   r\   r]   r   ri   rd   r   r   r   floatr   rp   r   r   r   r   rm   r   r   r	   r   r   r   r   r   r   propertyr   r   r   r   r   r   r   r   r   r0   r0   r0   r1   r*     s    	

4


	
K
!
O #$r*   proxy_locationhttp_optionsc                 C   s   |du r
d}t  }n/t|trd|v }t di |}nt|t r.d}t di |jdd}ntdt|j d| du rE|sCtj|_|S t	
| |_|S )	a  Prepare `HTTPOptions` with a resolved `location` based on `proxy_location` and `http_options`.

    Precedence:
    - If `proxy_location` is provided, it overrides any `location` in `http_options`.
    - Else if `http_options` specifies a `location` explicitly (HTTPOptions(...) or dict with 'location'), keep it.
    - Else (no `proxy_location` and no explicit `location`) set `location` to `DeploymentMode.EveryNode`.
      A bare `HTTPOptions()` counts as an explicit default (`HeadOnly`).

    Args:
        proxy_location: Optional ProxyLocation (or its string representation).
        http_options: Optional HTTPOptions instance or dict. If None, a new HTTPOptions() is created.

    Returns:
        HTTPOptions: New instance with resolved location.

    Note:
        1. Default ProxyLocation (when unspecified) resolves to DeploymentMode.EveryNode.
        2. Default HTTPOptions() location is DeploymentMode.HeadOnly.
        3. `HTTPOptions` is used in `imperative` mode (Python API) cluster set-up.
            `Declarative` mode (CLI / REST) uses `HTTPOptionsSchema`.

    Raises:
        ValueError: If http_options is not None, dict, or HTTPOptions.
    NFlocationTr   z#Unexpected type for http_options: ``r0   )r#   rc   rm   rh   r=   r[   r"   	EveryNoder   r$   _to_deployment_mode)r   r   location_set_explicitlyr0   r0   r1   prepare_imperative_http_options  s$   

r  )Gr   re   typingr   r   r   r   r   r   r   r	   google.protobuf.descriptorr
   google.protobuf.messager   rayr   ray._commonr   ray._common.pydantic_compatr   r   r   r   r   r   r   ray._common.serializationr   ray._common.utilsr   ray.serve._private.constantsr   r   r   r   r   r   r   ray.serve._private.utilsr   r   ray.serve.configr    r!   r"   r#   r$   r%   ray.serve.generated.serve_pb2r}   r&   r   r'   r(   r   r)   r   r*   r   r~   ray.util.placement_groupr+   r   r2   r5   rp   r   ri   rm   r  r0   r0   r0   r1   <module>   sH    ($	$	 $	&  =

    d