o
    `۷i0                     @   s  d dl Z d dlZd dlZd dlmZmZmZmZmZm	Z	 d dl
mZ d dlmZmZmZ d dlmZmZ d dlmZ eeZde	eef ded	ed
efddZdedededeeef d
eeeeef f f
ddZdededed
efddZde	eef dedeeef d
eeeeef f fddZdeeef deeef de	eef ded
eeeeef f f
ddZdeeef deeef d
eeef fddZded
ee fdd Z d!eegee	eef eeef f f d
eegeeeeef f f fd"d#Z!d!eeeef geeee	eef f eeeef  f f d
eeeef geeeef eeef f f fd$d%Z"ded
eeeeef f fd&d'Z#ed(d)ded
ee	eef eeef f fd*d+Z$e$Z%dS ),    N)AnyCallableDictOptionalTupleUnion)DeploymentID)CONTROL_LOOP_INTERVAL_S'SERVE_AUTOSCALING_DECISION_COUNTERS_KEYSERVE_LOGGER_NAME)AutoscalingConfigAutoscalingContext)	PublicAPIdesired_num_replicascurrent_num_replicasautoscaling_configreturnc                 C   sX   | | }|dkr|  n| }t|||  }tt| |k r*||kr*|d8 }|S )zApply scaling factors to the desired number of replicas.
    Returns the scaled number of replicas depending on the scaling factor.
    The computation uses the difference between desired and current to scale.

    r      )get_upscaling_factorget_downscaling_factormathceilfloat)r   r   r   replicas_deltascaling_factorscaled_num_replicas r   R/home/ubuntu/vllm_env/lib/python3.10/site-packages/ray/serve/autoscaling_policy.py_apply_scaling_factors   s   

r   curr_target_num_replicasconfigpolicy_statec                 C   s   |}| td}| |kr$|dk rd}|d7 }|t|jt kr#d}| }n9| |k r[|dkr.d}|d8 }|dk}|rE|jdurA|j}n|j}n|j}td| } |t|t  k rZd}| }nd}||t< ||fS )z4Apply delay logic to the desired number of replicas.r   r   N)getr
   intupscale_delay_sr	   downscale_to_zero_delay_sdownscale_delay_smax)r   r   r    r!   decision_num_replicasdecision_counteris_scaling_to_zerodelay_sr   r   r   _apply_delay_logic.   s8   

r,   num_replicascapacity_adjusted_min_replicascapacity_adjusted_max_replicasc                 C   s   t |t|| S )zAClip replica count to be within capacity-adjusted min/max bounds.)r'   min)r-   r.   r/   r   r   r   _apply_boundsi   s   r1   ctxc                 C   s>   t | |j|j} t| |j|j}t||j|j|\}}||fS )z?Apply the default parameters to the desired number of replicas.)r   r   r    r1   r.   r/   r,   target_num_replicas)r   r2   r!   bounded_num_replicasfinal_num_replicasupdated_stater   r   r   _apply_default_paramsu   s   
r7   user_policy_statec                 C   s6   t | t di}t|||\}}|r|| ||fS )Nr   )r
   r"   r7   update)r!   r8   r   r2   internal_policy_stater5   r6   r   r   r   %_apply_default_params_and_merge_state   s   	
r;   c                 C   s   t | t di}|| |S )z~Merge user state with previous policy state, preserving internal keys.

    This mutates and returns `user_policy_state`.
    r   )r
   r"   r9   )r!   r8   r:   r   r   r   %_merge_user_state_with_internal_state   s   

r<   c                 C   s:   | j dkr| jdkrttd| j  | jS | jS dS )zm
    Returns the desired number of replicas if the cold start fast path applies, otherwise returns None.
    r   r   N)r   total_num_requestsr'   r   r   r    r   r3   )r2   r   r   r   !_get_cold_start_scale_up_replicas   s   

r>   policy_funcc              	      s4   t  dtdtttttf f f fdd}|S )z
    Wraps a custom policy function to automatically apply:
    - upscaling_factor / downscaling_factor
    - min_replicas / max_replicas bounds
    - upscale_delay_s / downscale_delay_s / downscale_to_zero_delay_s
    r2   r   c                    sJ   t | }|d ur|| jfS | j } | \}}t|||| \}}||fS N)r>   r!   copyr;   )r2   cold_start_replicasr!   r   updated_custom_policy_stater5   final_stater?   r   r   wrapped_policy   s   

z1_apply_autoscaling_config.<locals>.wrapped_policy)	functoolswrapsr   r   r#   r   strr   r?   rF   r   rE   r   _apply_autoscaling_config   s   (rK   c              	      sD   t  dtttf dttttf tttf f f fdd}|S )z
    Wraps an application-level custom policy function to automatically apply per-deployment:
    - upscaling_factor / downscaling_factor
    - min_replicas / max_replicas bounds
    - upscale_delay_s / downscale_delay_s / downscale_to_zero_delay_s
    contextsr   c                    s   i }|   D ]\}}|j ||< q | \}}|pi }i }i }|   D ]@\}}||vr3|| ||< q$||i }t|}	|	d urO|	||< t|| |||< q$t|| ||| |\}
}|
||< |||< q$||fS r@   )itemsr!   rA   r"   r>   r<   r;   )rL   state_per_deploymentdep_idr2   desired_num_replicas_dictrC   final_decisionsrD   "custom_policy_state_per_deploymentrB   r5   final_dep_staterE   r   r   rF      s>   
z;_apply_app_level_autoscaling_config.<locals>.wrapped_policy)rG   rH   r   r   r   r   r#   rJ   r   rE   r   #_apply_app_level_autoscaling_config   s   
,rT   c                 C   sB   | j }| j}|dkrtd| | }| j| }|| }|i fS )Nr   z!Number of replicas cannot be zero)r   r    
ValueErrorget_target_ongoing_requestsr=   )r2   num_running_replicasr    target_num_requestserror_ratior   r   r   r   !_core_replica_queue_length_policy'  s   
rZ   alpha)	stabilityc                 C   s"   t | }|dur|| jfS t| S )a  The default autoscaling policy based on basic thresholds for scaling.
    There is a minimum threshold for the average queue length in the cluster
    to scale up and a maximum threshold to scale down. Each period, a 'scale
    up' or 'scale down' decision is made. This decision must be made for a
    specified number of periods in a row before the number of replicas is
    actually scaled. See config options for more details.  Assumes
    `get_decision_num_replicas` is called once every CONTROL_LOOP_PERIOD_S
    seconds.
    N)r>   r!   rZ   )r2   rB   r   r   r   'replica_queue_length_autoscaling_policy4  s   
r]   )&rG   loggingr   typingr   r   r   r   r   r   ray.serve._private.commonr   ray.serve._private.constantsr	   r
   r   ray.serve.configr   r   ray.util.annotationsr   	getLoggerloggerr#   r   r   rI   r,   r1   r7   r;   r<   r>   rK   rT   rZ   r]   default_autoscaling_policyr   r   r   r   <module>   s     




;











 

C
