o
    پi(#                     @   s   d dl Z d dlZd dlZd dl mZmZ d dlmZmZmZ d dl	Z	d dl
mZ er0d dlmZ edZeeZeddG d	d
 d
ZG dd deZG dd dZG dd dZdededed ddfddZdS )    N)	dataclassfield)TYPE_CHECKING
NamedTupleOptional)get_bool_env_var)SchedulerMetricsCollector SGLANG_PREFILL_DELAYER_DEBUG_LOGT)frozenc                   @   s8   e Zd ZU dZeed< eejdZ	e
ed< d	ddZdS )
_Stater   delayed_count)default_factory
start_timereturnc                 C   s   t j| | jd dS )N   )r   )dataclassesreplacer   self r   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/managers/prefill_delayer.pybump_delayed_count   s   z_State.bump_delayed_countN)r   r   )__name__
__module____qualname__r   int__annotations__r   timeperf_counterr   floatr   r   r   r   r   r      s   
 r   c                   @   sB   e Zd ZU ee ed< eed< eed< eed< eed< eed< dS )_NegotiateOutput
next_stateinput_estimationoutput_allowoutput_reasonnum_prefillablenum_token_watermark_force_allowN)	r   r   r   r   r   r   strboolr   r   r   r   r   r       s   
 r    c                   @   s|   e Zd Z	ddedededee ded f
dd	Zd
ededefddZ	dee
 d
ededefddZd
edefddZdS )PrefillDelayerNdp_sizeattn_tp_sizemax_delay_passestoken_usage_low_watermarkmetrics_collectorr   c                 C   s   || _ || _td| j  d| j  tj||dftjdd| _|| _|| _	d | _
|js0J d|jdks9J d|jr@J d	d S )
Nz1PrefillDelayer initialized with max_delay_passes=z token_usage_low_watermark=   cpu)dtypedevicez;To use PrefillDelayer, enable_dp_attention must be enabled.nullz8To use PrefillDelayer, disaggregation_mode must be null.z>To use PrefillDelayer, disable_overlap_schedule must be False.)_max_delay_passes_token_usage_low_watermarkloggerinfotorchemptyint64_global_info_buffer
_cpu_group_metrics_collector_curr_stateenable_dp_attentiondisaggregation_modedisable_overlap_schedule)r   r*   r+   	cpu_groupserver_argsr,   r-   r.   r   r   r   __init__&   s8   
zPrefillDelayer.__init__local_prefillabletoken_usager   c                 C   s   | j | j||d}|j| _|S )N)
prev_staterE   rF   )$_negotiate_should_allow_prefill_purer>   r!   )r   rE   rF   outr   r   r   _negotiate_should_allow_prefillM   s   z.PrefillDelayer._negotiate_should_allow_prefillrG   c                 C   sV  |o| j  }d uo||k }| j||d\}}|  dkr!d}n|  dkr,d}nd}|  dk}	t||  |  d}
|dkr\|d u}tdd d|rUdnd	d
|
S |dkrktdd ddd
|
S |dkr|	r|tdd ddd
|
S |r|jnd}|| j	d k r|pt
 }| }td|ddd
|
S tdd ddd
|
S t)N)rE   !local_token_watermark_force_allowr   allnonemixed)r"   r%   r&   Twait_successno_wait)r!   r#   r$    token_watermarkr   Fdelaywait_timeoutr   )r5   _gather_infominitemmaxdictsumr    r   r4   r   r   NotImplementedError)r   rG   rE   rF   xrK   global_prefillable"global_token_watermark_force_allowprefillable_status)global_exists_token_watermark_force_allow
debug_infoexist_previous_waitprev_delayed_countr!   r   r   r   rH   Y   s   




z3PrefillDelayer._negotiate_should_allow_prefill_purerK   c                 C   sp   t jt|t|gdt jd}t jj| j || jd | jd d dd d f }|d d df |d d df fS )Nr0   )r2   r1   )groupr   r   )	r8   tensorr   r:   distributedall_gather_into_tensorr;   flattenr<   )r   rE   rK   
local_infotp0_infor   r   r   rU      s    zPrefillDelayer._gather_infoN)r   r   r   r   r   r   rD   r(   r    rJ   r   rH   rU   r   r   r   r   r)   %   sD    	
'

Qr)   c                   @   sP   e Zd ZdedefddZedefddZdefd	d
Z	dedefddZ
dS ) PrefillDelayerSinglePassExecutorprefill_delayerrF   c                 C   s   || _ || _d | _d S rk   )_prefill_delayer_token_usage_result)r   rm   rF   r   r   r   rD      s   
z)PrefillDelayerSinglePassExecutor.__init__r   c                 C   s
   | j d uS rk   )rp   r   r   r   r   _called   s   
z(PrefillDelayerSinglePassExecutor._calledactual_prefillc                C   s*   | j s	| jdd t|| j| jjd d S )NF)rE   )actual_executionoutputr.   )rq   negotiate_should_allow_prefill_record_single_pass_resultrp   rn   r=   )r   rr   r   r   r   finalize   s   
z)PrefillDelayerSinglePassExecutor.finalizerE   c                 C   s"   | j s| jj|| jd| _| jjS )N)rE   rF   )rq   rn   rJ   ro   rp   r#   )r   rE   r   r   r   ru      s   z?PrefillDelayerSinglePassExecutor.negotiate_should_allow_prefillN)r   r   r   r)   r   rD   propertyr(   rq   rw   ru   r   r   r   r   rl      s    
rl   rs   rt   r.   r   r   c              	   C   s   t r9|jr|jdkrtd|j d|  d n!|jr2|jdkr2td|j d|j d|  d n|jdv s9J |d urc|j }d urOt	 |j
 }|j}nd	 }}|j|||j|j|j| d
 d S d S )NrT   z@PrefillDelayer timeout thus not forbid prefill (num_prefillable=z, actual_execution=)rR   zJPrefillDelayer force allow prefill due to low watermark. (num_prefillable=z", num_token_watermark_force_allow=>   rQ   rS   rP   rO   r   )forward_passeswait_secondsr"   r#   r$   rs   )
_DEBUG_LOGr#   r$   r6   r7   r%   r&   r!   r   r   r   r   observe_prefill_delayer_outcomer"   )rs   rt   r.   sr{   rz   r   r   r   rv      sD   
rv   )r   loggingr   r   r   typingr   r   r   r8   sglang.srt.utilsr   sglang.srt.metrics.collectorr   r|   	getLoggerr   r6   r   r    r)   rl   r(   rv   r   r   r   r   <module>   s4    
	 