o
    پiv                     @  s  U d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d dlmZmZmZmZmZmZmZmZ d dlZd dlZd dlZd d	lmZ d d
lmZ d dlm Z  d dl!m"Z" d dl#m$Z$m%Z% eryd dl&m'Z' e(e)Z*ed Z+eG dd dZ,G dd deZ-G dd de-Z.G dd de-Z/e. a0de1d< dd Z2dd Z3G dd deZ4G d d! d!e4Z5G d"d# d#e4Z6d^d(d)Z7G d*d+ d+e4Z8G d,d- d-e8Z9G d.d/ d/e6Z:G d0d1 d1e8Z;d_d8d9Z<d`d=d>Z=d?Z>G d@dA dAeZ?G dBdC dCe?Z@G dDdE dEZAG dFdG dGe@ZBG dHdI dIe@ZCdJdK ZDG dLdM dMZEG dNdO dOeEZFG dPdQ dQeEZGdadUdVZHdbdYdZZIdcd\d]ZJdS )d    )annotationsN)ABCdeque)contextmanager)	dataclass)Path)TYPE_CHECKINGAnyDictListLiteralOptionalTupleType)envs)ExpertDispatchCollector)ForwardBatch)
ServerArgs)Withableget_int_env_var)ExpertLocationMetadata)fileobjectc                   @  s   e Zd ZU ded< dd ZdS )ExpertDistributionMetricstorch.Tensoreplb_balancednessc                 C  s   | j jddd| _ d S )NcpuT)non_blocking)r   toself r"   W/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/eplb/expert_distribution.pycopy_to_cpu3   s   z%ExpertDistributionMetrics.copy_to_cpuN)__name__
__module____qualname____annotations__r$   r"   r"   r"   r#   r   /   s   
 r   c                   @  s   e Zd ZdZed-dd	Zed
d Zedd Zedd Z	ed.ddZ
d/ddZd0ddZd1ddZdd  Zd!d" Zd2d3d&d'Zed(d) Zd*d+ Zd,S )4ExpertDistributionRecorderz$Global expert distribution recordingserver_argsr   expert_location_metadatar   rankintc                 C  s0   | j d ur|d usJ d	 	 t| ||S t S )NzRExpertLocationMetadata is required for expert distribution recording. One possible)!expert_distribution_recorder_mode_ExpertDistributionRecorderReal_ExpertDistributionRecorderNoopr*   r+   r,   r"   r"   r#   init_new:   s   

z#ExpertDistributionRecorder.init_newc                 c      d V  d S Nr"   r!   	layer_idxr"   r"   r#   with_current_layerL      
z-ExpertDistributionRecorder.with_current_layerc                 c  r3   r4   r"   r!   
debug_namer"   r"   r#   with_debug_nameP   r8   z*ExpertDistributionRecorder.with_debug_namec                 c  r3   r4   r"   r    r"   r"   r#   disable_this_regionT   r8   z.ExpertDistributionRecorder.disable_this_regionforward_pass_idforward_batchr   c                 c  s    i V  d S r4   r"   )r!   r=   r>   r"   r"   r#   with_forward_passX   r8   z,ExpertDistributionRecorder.with_forward_passtopk_idsr   c                 C     d S r4   r"   r!   r@   r"   r"   r#   on_select_experts\      z,ExpertDistributionRecorder.on_select_expertslocal_physical_count_of_layer	List[int]c                 C  rA   r4   r"   r!   rE   num_tokens_per_ranknum_tokens_per_rdma_ranknum_tokens_per_expertr"   r"   r#   on_deepep_dispatch_normal_      z4ExpertDistributionRecorder.on_deepep_dispatch_normalc                 C  rA   r4   r"   r!   rE   r"   r"   r#   on_deepep_dispatch_low_latencyh      z9ExpertDistributionRecorder.on_deepep_dispatch_low_latencyc                 C     |    d S r4   _on_not_implementedr    r"   r"   r#   start_recordm      z'ExpertDistributionRecorder.start_recordc                 C  rP   r4   rQ   r    r"   r"   r#   stop_recordp   rT   z&ExpertDistributionRecorder.stop_recordr   output_mode_OutputModec                 C  rP   r4   rQ   r!   rV   r"   r"   r#   dump_records   rT   z&ExpertDistributionRecorder.dump_recordc                 C  s   dS )NFr"   r    r"   r"   r#   	recordingv   s   z$ExpertDistributionRecorder.recordingc                 C  s   t d)NzZPlease set ServerArgs.expert_distribution_recorder_mode to use ExpertDistributionRecorder.)	Exceptionr    r"   r"   r#   rR   z   s   z.ExpertDistributionRecorder._on_not_implementedNr*   r   r+   r   r,   r-   r=   r-   r>   r   r@   r   rE   rF   rE   r   r   rV   rW   )r%   r&   r'   __doc__staticmethodr2   r   r7   r;   r<   r?   rC   rK   rN   rS   rU   rY   propertyrZ   rR   r"   r"   r"   r#   r)   7   s*    





	
r)   c                   @  s   e Zd ZdS )r0   N)r%   r&   r'   r"   r"   r"   r#   r0      s    r0   c                   @  s   e Zd Zd6ddZd	d
 Zdd Zed7ddZedd Zd8ddZ	d9ddZ
d:ddZd;d d!Zd<d"d#Zd=d&d'Zd(d) Zd*d+ Zd,d- Zd>d?d1d2Zed3d4 Zd5S )@r/   r*   r   r+   r   r,   r-   c                   s~   | _  | _d| _d| _t | _t | _t | _t	 | _
 fdd| j
 D | _jr=td |   d S d S )NFc                   s   i | ]
}|t  qS r"   )_SinglePassGathererr2   ).0kr+   r,   r*   r"   r#   
<dictcomp>   s    z<_ExpertDistributionRecorderReal.__init__.<locals>.<dictcomp>zUExpertDistributionRecorder auto start record since enable_expert_distribution_metrics)_server_args_expert_location_metadata
_recording_disable_allr   _current_forward_pass_id_current_layer_idx_current_debug_name_Accumulatorr2   _accumulatorget_single_pass_gatherer_keys_single_pass_gatherers"enable_expert_distribution_metricsloggerinforS   r!   r*   r+   r,   r"   ri   r#   __init__   s&   z(_ExpertDistributionRecorderReal.__init__c                 C     | j |S r4   )rp   
with_valuer5   r"   r"   r#   r7      rT   z2_ExpertDistributionRecorderReal.with_current_layerc                 C  r{   r4   )rq   r|   r9   r"   r"   r#   r;      rT   z/_ExpertDistributionRecorderReal.with_debug_namer=   r>   r   c                 c  sh    i }| j |! | | z|V  W | || n| || w W d    d S 1 s-w   Y  d S r4   )ro   r|   _on_forward_pass_start_on_forward_pass_end)r!   r=   r>   outputsr"   r"   r#   r?      s   
"z1_ExpertDistributionRecorderReal.with_forward_passc                 c  s*    | j }d| _ z	dV  W || _ dS || _ w )z1Context manager to temporarily disable recording.TN)rn   )r!   previous_disable_allr"   r"   r#   r<      s   z3_ExpertDistributionRecorderReal.disable_this_regionc                 C  s4   | j sd S | j D ]\}}|  || q
d S r4   )rm   ru   itemsreseton_forward_pass_start)r!   r>   gatherer_keygathererr"   r"   r#   r}      s   z6_ExpertDistributionRecorderReal._on_forward_pass_startr   Dict[str, Any]c                 C  s<   | j sd S | j D ]\}}| }| j|||| q
d S r4   )rm   ru   r   collectrs   append)r!   r=   r   r   r   single_pass_datar"   r"   r#   r~      s   z4_ExpertDistributionRecorderReal._on_forward_pass_endr@   r   c                 C     | j d|d d S )NrC   )r@   _on_hookrB   r"   r"   r#   rC      s   z1_ExpertDistributionRecorderReal.on_select_expertsrE   rF   c                 C  s   | j d||||d d S )NrK   )rE   rH   rI   rJ   r   rG   r"   r"   r#   rK      s   
z9_ExpertDistributionRecorderReal.on_deepep_dispatch_normalc                 C  r   )NrN   )rE   r   rM   r"   r"   r#   rN      s   
z>_ExpertDistributionRecorderReal.on_deepep_dispatch_low_latency	hook_namestrc                 K  sV   | j rd S | jst  sd S | j| j| jj	 }t
||dd| jj	i| d S )Nr6   r"   )rn   rm   torchget_device_moduleis_current_stream_capturingru   rs   get_single_pass_gatherer_keyrq   valuegetattrrp   )r!   r   kwargsr   r"   r"   r#   r      s   
 z(_ExpertDistributionRecorderReal._on_hookc                 C  sN   t d | jjdu sJ d| jj| j D ]}|  q| j  dS )z'Reset the expert distribution recorder.z'Resetting ExpertDistributionRecorder...Nzself._current_layer_idx.value=)rw   rx   rp   r   ru   valuesr   rs   )r!   r   r"   r"   r#   _reset   s   

z&_ExpertDistributionRecorderReal._resetc                 C  s"   | j rtd |   d| _ dS )z(Start recording the expert distribution.zSGLang server is already recording expert ids. Did you forget to dump the expert ids recorded so far by sending requests to the `/stop_expert_distribution_record` and `/dump_expert_distribution_record` endpoints?TN)rm   rw   warningr   r    r"   r"   r#   rS      s   
z,_ExpertDistributionRecorderReal.start_recordc                 C  s   | j std d| _ dS )z'Stop recording the expert distribution.zSGLang server has not been recording expert ids. Did you forget to start recording by sending request to the `/start_expert_distribution_record` endpoint?FN)rm   rw   r   r    r"   r"   r#   rU     s
   
z+_ExpertDistributionRecorderReal.stop_recordr   rV   rW   c                 C  s   | j j|d}|   |S )zIDump the expert distribution record and reset the recorder after dumping.)rV   )rs   dumpr   r!   rV   outputr"   r"   r#   rY     s   z+_ExpertDistributionRecorderReal.dump_recordc                 C     | j S r4   )rm   r    r"   r"   r#   rZ     s   z)_ExpertDistributionRecorderReal.recordingNr\   r]   r>   r   )r=   r-   r   r   r^   r_   r`   )r   r   ra   rb   )r%   r&   r'   rz   r7   r;   r   r?   r<   r}   r~   rC   rK   rN   r   r   rS   rU   rY   re   rZ   r"   r"   r"   r#   r/      s(    
	

	

	



	r/   z$Optional[ExpertDistributionRecorder]$_global_expert_distribution_recorderc                   C     t S r4   r   r"   r"   r"   r#   'get_global_expert_distribution_recorder  rD   r   c                 C  s   | a d S r4   r   )r   r"   r"   r#   'set_global_expert_distribution_recorder"  s   r   c                   @  s^   e Zd Zed"d	d
Zd#ddZd$ddZd%ddZd&ddZd'ddZ	dd Z
d(dd Zd!S ))rf   r*   r   r+   r   r,   r-   return'_SinglePassGatherer'c                 C  s   | j dkrt| ||S | j dkr!| jdkr| jdkrt||S t| jdkr<| jdkr0t||S | jdkr:t||S tt||S )N	per_tokenstat_approxnonenormallow_latency)r.   _DetailSinglePassGatherermoe_a2a_backenddeepep_mode_DeepepNormalSinglePassGathererNotImplementedError _SelectExpertsSinglePassGatherer#_DeepepLowLatencySinglePassGathererr1   r"   r"   r#   r2   +  s$   









z_SinglePassGatherer.init_newc                 C  s   || _ || _d S r4   )rl   _rank)r!   r+   r,   r"   r"   r#   rz   J  s   
z_SinglePassGatherer.__init__r>   r   c                 C  rA   r4   r"   r!   r>   r"   r"   r#   r   N  rD   z)_SinglePassGatherer.on_forward_pass_startr6   r@   r   c                 C  rA   r4   r"   r!   r6   r@   r"   r"   r#   rC   Q  rD   z%_SinglePassGatherer.on_select_expertsrE   rF   c                 C  rA   r4   r"   r!   r6   rE   rH   rI   rJ   r"   r"   r#   rK   T  s   z-_SinglePassGatherer.on_deepep_dispatch_normalc                 C  rA   r4   r"   r!   r6   rE   r"   r"   r#   rN   ^  rO   z2_SinglePassGatherer.on_deepep_dispatch_low_latencyc                 C     t r4   r   r    r"   r"   r#   r   c  rD   z_SinglePassGatherer.resetr   c                 C  r   r4   r   r    r"   r"   r#   r   f  rD   z_SinglePassGatherer.collectN)r*   r   r+   r   r,   r-   r   r   )r+   r   r,   r-   r   r6   r-   r@   r   r6   r-   rE   rF   r6   r-   rE   r   r   r   )r%   r&   r'   rd   r2   rz   r   rC   rK   rN   r   r   r"   r"   r"   r#   rf   *  s    





rf   c                      sR   e Zd ZdZd fdd	ZdddZdddZd ddZdd Zd!ddZ	  Z
S )"r      r*   r   r+   r   r,   r-   c                   sR   t  || d | _tj|j|jd | jftj|j	d| _
g | _|jr'J dd S )Nr   dtypedevicez1DetailSinglePassGatherer does not support TBO yet)superrz   	_metadatar   zeros
num_layerschunked_prefill_size
_TOP_K_NUMint32r   _topk_ids_of_layer_misc_objectsenable_two_batch_overlapry   	__class__r"   r#   rz   n  s   
z"_DetailSinglePassGatherer.__init__r>   r   c                 C  s>   | j d u sJ t|j  |j  |j|jjd| _ d S )N)	input_ids	positionsextend_seq_lensforward_mode)	r   dictr   r   tolistr   extend_seq_lens_cpur   r   r   r"   r"   r#   r     s   z/_DetailSinglePassGatherer.on_forward_pass_startr6   r@   r   c                 C  s(   || j |d |jd d |jd f< d S )Nr      )r   shaper   r"   r"   r#   rC     s   &z+_DetailSinglePassGatherer.on_select_expertsrE   rF   c                 C  s4   | j t||  |  |  d d S )N)layer_idrH   rI   rJ   )r   r   r   r   r   r   r"   r"   r#   rK     s   


z3_DetailSinglePassGatherer.on_deepep_dispatch_normalc                 C  s   d| j d< | j  d | _d S )N.)r   r   clearr   r    r"   r"   r#   r     s   


z_DetailSinglePassGatherer.resetr   r   c                 C  sf   t | jd }t|| jj| jj| jd}tdi | j| jd d d |d d f  	 | j
|dS )Nr   )r   num_physical_expertsr   )topk_ids_of_layermisc_objectsglobal_physical_countr"   )lenr   +_convert_per_token_to_global_physical_countrl   r   r   r   r   cloner   r   )r!   
num_tokensr   r"   r"   r#   r     s   "
z!_DetailSinglePassGatherer.collectr\   r   r   r   r   )r%   r&   r'   r   rz   r   rC   rK   r   r   __classcell__r"   r"   r   r#   r   j  s    


r   c                      s8   e Zd Z fddZdddZd	d
 ZdddZ  ZS ) _LayerBasedCpuSinglePassGathererc                   s   t  j|i | i | _d S r4   )r   rz   _objects_of_layerr!   argsr   r   r"   r#   rz        
z)_LayerBasedCpuSinglePassGatherer.__init__r6   r-   objectsrF   c                 C  sR   d|  kr| j jk sJ  J || jv r"t| j| || j|< d S || j|< d S )Nr   )rl   r   r   	_list_sum)r!   r6   r   r"   r"   r#   _on_layer_data  s    

z/_LayerBasedCpuSinglePassGatherer._on_layer_datac                 C  s   | j   d S r4   )r   r   r    r"   r"   r#   r        z&_LayerBasedCpuSinglePassGatherer.resetpad_lenr   r   c                   s&    fddt jjD }t|S )Nc                   s"   g | ]}j |pd g  qS )r   )r   get)rg   layer_indexr   r!   r"   r#   
<listcomp>  s    zE_LayerBasedCpuSinglePassGatherer._collect_objects.<locals>.<listcomp>)rangerl   r   r   tensor)r!   r   datar"   r   r#   _collect_objects  s   

z1_LayerBasedCpuSinglePassGatherer._collect_objects)r6   r-   r   rF   )r   r-   r   r   )r%   r&   r'   rz   r   r   r   r   r"   r"   r   r#   r     s
    
	r   ar   br   c                 C  s   dd t | |ddD S )Nc                 S  s   g | ]\}}|| qS r"   r"   )rg   xyr"   r"   r#   r     s    z_list_sum.<locals>.<listcomp>T)strict)zip)r   r   r"   r"   r#   r     s   r   c                      s0   e Zd Zd fddZdd Zdd	d
Z  ZS ) _LayerBasedGpuSinglePassGathererenable_global_physical_expertsboolc                  sH   t  j|i | || _tj| jj|r| jjn| jjftj	dd| _
d S )Ncudar   )r   rz   _enable_global_physical_expertsr   r   rl   r   r   num_local_physical_expertsr-   _data)r!   r   r   r   r   r"   r#   rz     s   
z)_LayerBasedGpuSinglePassGatherer.__init__c                 C     d| j d< d S Nr   .r   r    r"   r"   r#   r     r   z&_LayerBasedGpuSinglePassGatherer.resetr   r   c                 C  s4   | j r| j}nt| j| j| jj| jjd}t|dS )Nr,   r   r   r   )r   r   '_convert_local_to_global_physical_countr   rl   r   r   r   )r!   r   r"   r"   r#   r     s   
z(_LayerBasedGpuSinglePassGatherer.collect)r   r   r   )r%   r&   r'   rz   r   r   r   r"   r"   r   r#   r     s    r   c                      &   e Zd Z fddZd	ddZ  ZS )
r   c                      t  j|i |ddi d S )Nr   Tr   rz   r   r   r"   r#   rz        z)_SelectExpertsSinglePassGatherer.__init__r6   r-   r@   r   c                 C  sD   |  }|dk}| j|d d f jd|| d | d d S )Nr   r   dimindexsrc)flattenr   scatter_add_masked_filllongr-   )r!   r6   r@   maskr"   r"   r#   rC     s
   
z2_SelectExpertsSinglePassGatherer.on_select_expertsr   )r%   r&   r'   rz   rC   r   r"   r"   r   r#   r     s    r   c                      s4   e Zd Z fddZdddZd fddZ  ZS )r   c                   s2   t  j|i | tj dkrtd d S d S )Nr   zDeepepNormalSinglePassGatherer gathers approximate statistics. If used with small batch size, consider using expert_distribution_recorder_mode=stat.)r   rz   r   distributedget_rankrw   rx   r   r   r"   r#   rz     s   z(_DeepepNormalSinglePassGatherer.__init__r6   r-   rE   rF   c                 C  s   t |tsJ | || d S r4   )
isinstancelistr   r   r"   r"   r#   rK     s   z9_DeepepNormalSinglePassGatherer.on_deepep_dispatch_normalr   r   c                   s6   t  j| jjd}t|| j| jj| jjd}t|dS )N)r   r  r  )r   r   rl   r   r  r   r   r   )r!   local_physical_countr   r   r"   r#   r     s   
z'_DeepepNormalSinglePassGatherer.collectr   r   )r%   r&   r'   rz   rK   r   r   r"   r"   r   r#   r     s    
r   c                      r  )
r   c                   r  )Nr   Fr  r   r   r"   r#   rz   -  r	  z,_DeepepLowLatencySinglePassGatherer.__init__r6   r-   rE   r   c                 C  s   | j |d d f  |7  < d S r4   r  r   r"   r"   r#   rN   0  s   zB_DeepepLowLatencySinglePassGatherer.on_deepep_dispatch_low_latencyr   )r%   r&   r'   rz   rN   r   r"   r"   r   r#   r   ,  s    r   r   r-   r   r   r   r   c           	      C  sp   |d d d | d d f  |d}|dk}|| d }| }tj||f|j|jd}|jd||d |S )Nr   r   r   r   r
  )	reshaper  r  r-   r   r   r   r   r  )	r   r   r   r   topk_ids_layer_majorr  r  r  ansr"   r"   r#   r   7  s   "r   r  r,   r   c           	      C  sN   | j }| j}| j\}}tj||f||d}| |d d || ||d  f< |S )Nr   r   )r   r   r   r   r   )	r  r,   r   r   r   r   r   _r  r"   r"   r#   r  L  s   
r  primaryc                   @  s`   e Zd Zed&d	d
Zed'ddZd(ddZdd Zd)ddZd*ddZ	dd  Z
d+d#d$Zd%S ),rr   r*   r   r+   r   r,   r-   r   '_Accumulator'c                 C  s   t | | ||S r4   )rr   	get_classr1   r"   r"   r#   r2   c  s   z_Accumulator.init_newType['_Accumulator']c                 C  s   t t ttd| j S )N)statr   per_passr   )_StatAccumulator_DetailAccumulatorr.   )r*   r"   r"   r#   r  m  s   z_Accumulator.get_classc                 C  s   || _ || _|| _d S r4   )rk   rl   r   ry   r"   r"   r#   rz   v  s   
z_Accumulator.__init__c                 C  s   t gS r4   !_SINGLE_PASS_GATHERER_KEY_PRIMARYr    r"   r"   r#   rt        z*_Accumulator.get_single_pass_gatherer_keysr:   Optional[str]c                 C  r   r4   r$  r9   r"   r"   r#   r     rD   z)_Accumulator.get_single_pass_gatherer_keyr=   r   r   r   r   r   r   c                 C  rA   r4   r"   r!   r=   r   r   r   r"   r"   r#   r     rL   z_Accumulator.appendc                 C  rA   r4   r"   r    r"   r"   r#   r     rD   z_Accumulator.resetrV   rW   c                 C  rA   r4   r"   rX   r"   r"   r#   r     rD   z_Accumulator.dumpN)r*   r   r+   r   r,   r-   r   r  )r*   r   r   r  r\   r:   r'  r=   r-   r   r   r   r   r   r   rb   )r%   r&   r'   rd   r2   r  rz   rt   r   r   r   r   r"   r"   r"   r#   rr   b  s    	



	rr   c                      sJ   e Zd Z fddZd fddZ fddZdddZdddZ  ZS ) _UtilizationRateAccumulatorMixinc                   sb   t  j|i | | jj| _| jr/g d| _t| jd| _tj	
 | _t| jj| _d| _d S d S )N)
   d   i  )maxlensr   )r   rz   rk   rv   _enablewindow_sizes_DequeCollection_historyr   r  r  r   r   rl   ep_size_expert_dispatch_collector"_metric_heatmap_collection_counterr   r   r"   r#   rz     s   


z)_UtilizationRateAccumulatorMixin.__init__r=   r-   r   r   r   r   r   r   c                   s.   t  |||| | jr| ||d |S d S Nr   )r   r   r/  _append_utilization_rater(  r   r"   r#   r     s   
z'_UtilizationRateAccumulatorMixin.appendc                   s"   t    | jr| j  d S d S r4   )r   r   r/  r2  r   r    r   r"   r#   r     s   
z&_UtilizationRateAccumulatorMixin.reset!single_pass_global_physical_countr   c                 C  s   t || jjd}|| jj}tjj|dtjj	j
d | jdkrv| | tt|}tj rEtd| jd| t|d|d< d S | }| j| |  }td| d	|d
dddd | j  D  d|  d S d S )Nnum_gpur   )dstopzhi self._rank=z utilization_rate_gpu=)r   metricsz&[Expert Balancedness] forward_pass_id=z current_pass_balancedness=.03f  c                 s  s(    | ]\}}d | d|ddV  qdS )last_z_average_balancedness=r>  r?  Nr"   )rg   sizer   r"   r"   r#   	<genexpr>  s   & zL_UtilizationRateAccumulatorMixin._append_utilization_rate.<locals>.<genexpr>z gpu_physical_count_sum=)compute_gpu_physical_countrl   r3  r   rk   r   r   r  reduceReduceOpSUMr   _handle_metric_eplb_heatmapmeancompute_utilization_rater   &SGLANG_ENABLE_EPLB_BALANCEDNESS_METRICr   printr   itemr2  r   sumrw   rx   joinr   )r!   r=   r8  r   gpu_physical_countutilization_rate_gpuutilization_rate_cpugpu_physical_count_sumr"   r"   r#   r7    sB   


z9_UtilizationRateAccumulatorMixin._append_utilization_raterP  c                 C  s   t dd}|dkr`| j| dkr`t| jjD ]I}| jjjt|d}| jj	t
|jd ks<J d| jj	dt
|jt| jj	D ]}|||f }|dkr^|j||  |j| | qBq|  jd7  _d S )N'SGLANG_EPLB_HEATMAP_COLLECTION_INTERVALr   )layerr   z'self._expert_location_metadata.ep_size=z, len(count_of_layer._buckets)=)r   r5  r   rl   r   r4  eplb_gpu_physical_countlabelsr   r3  r   _buckets_suminc)r!   rP  intervalr6   count_of_layergpu_rankcountr"   r"   r#   rH    s(   
z<_UtilizationRateAccumulatorMixin._handle_metric_eplb_heatmapr*  )r=   r-   r8  r   r   r   )rP  r   )	r%   r&   r'   rz   r   r   r7  rH  r   r"   r"   r   r#   r+    s    
+r+  c                   @  s0   e Zd ZdddZdd Zdd ZdddZdS )r1  r.  rF   c                 C  s   dd |D | _ d S )Nc                 S  s   g | ]}t |d qS ))maxlenr   )rg   r_  r"   r"   r#   r     s    z-_DequeCollection.__init__.<locals>.<listcomp>	_dequeues)r!   r.  r"   r"   r#   rz     s   z_DequeCollection.__init__c                 C  s   | j D ]}|| qd S r4   )ra  r   )r!   r   dr"   r"   r#   r     s   
z_DequeCollection.appendc                 C  s   | j D ]}|  qd S r4   )ra  r   )r!   rb  r"   r"   r#   r     s   

z_DequeCollection.clearr   Dict[int, float]c                 C  s   dd | j D S )Nc                 S  s    i | ]}|j t|t| qS r"   )r_  rN  r   )rg   rb  r"   r"   r#   rj     s     z)_DequeCollection.mean.<locals>.<dictcomp>r`  r    r"   r"   r#   rI       z_DequeCollection.meanN)r.  rF   )r   rc  )r%   r&   r'   rz   r   r   rI  r"   r"   r"   r#   r1    s
    
r1  c                      sZ   e Zd Z fddZ fddZd fddZd fddZ fddZdddZ  Z	S )r#  c                   s   t  j|i | g | _d S r4   )r   rz   _recordsr   r   r"   r#   rz   
  r   z_DetailAccumulator.__init__c                   s   	 t  S r4   )r%  r   rt   r    r   r"   r#   rt     s   
z0_DetailAccumulator.get_single_pass_gatherer_keysr:   r'  c                   s   	 t |S r4   )r%  r   r   r9   r   r"   r#   r     s   z/_DetailAccumulator.get_single_pass_gatherer_keyr=   r-   r   r   r   r   r   r   c                   sT   t  |||| dd   fdd| D }| jtd|| j|d| d S )Nc                 S  s   t | tjr|   S | S r4   )r  r   Tensorr   r   )objr"   r"   r#   _process_object!  s   z2_DetailAccumulator.append.<locals>._process_objectc                   s   i | ]	\}}| |qS r"   r"   )rg   rh   vrh  r"   r#   rj   &  s    z-_DetailAccumulator.append.<locals>.<dictcomp>)r=   r,   r   r"   )r   r   r   re  r   r   )r!   r=   r   r   r   single_pass_data_processedr   rj  r#   r     s   
z_DetailAccumulator.appendc                   s   t    | j  d S r4   )r   r   re  r   r    r   r"   r#   r   3     
z_DetailAccumulator.resetrV   rW   c                 C  s@   |dksJ t | j| jjd}tdt  d| j d| d S )Nr   )recordslast_physical_to_logical_mapexpert_distribution_recorder_r  .pt)r   re  rl   physical_to_logical_map_dump_to_filetimer   r   r"   r"   r#   r   7  s   z_DetailAccumulator.dumpr)  r*  rb   )
r%   r&   r'   rz   rt   r   r   r   r   r   r"   r"   r   r#   r#  	  s    r#  c                      sH   e Zd Z fddZd fddZ fddZdddZdd Z  ZS )r"  c                   sF   t  j|i | tj| jj| jjf| jjt	j
| jjd| _d| _d S )N
item_shapebuffer_sizer   r   T)r   rz   _Bufferr2   rl   r   r   rk   (expert_distribution_recorder_buffer_sizer   r   r   '_global_physical_count_of_buffered_step_first_dumpr   r   r"   r#   rz   D  s   

z_StatAccumulator.__init__r=   r-   r   r   r   r   r   r   c                   s&   t  |||| | j|d  d S r6  )r   r   ry  r(  r   r"   r#   r   R  s   z_StatAccumulator.appendc                   s   t    | j  d S r4   )r   r   ry  r    r   r"   r#   r   _  rl  z_StatAccumulator.resetrV   rW   c                 C  s   t | j | jj| jj| jjd}| jrd| _t	 
  tjj|tjjjd t| j||  d}|dkrJ| jdkrHtdt  d| d S d S |d	krP|S t)
N)r   num_logical_expertsrq  F)r<  )r,   logical_count$average_utilization_rate_over_windowr   r   ro  rp  r   )/_convert_global_physical_count_to_logical_country  get_allrl   r   r{  rq  rz  r   r   empty_cacher  
all_reducerF  rG  r   r   $_get_global_average_utilization_raterr  rs  r   )r!   rV   logical_count_of_buffered_stepr   r"   r"   r#   r   c  s0   

z_StatAccumulator.dumpc                 C  s   | j rt| jjdrd S | jdkr1| j }| jd }||v r$|| nd}t	j
|gt	jdd}n	t	jdt	jdd}t	jj|dd | S )Ng      ?r   r   r   r   r   )r  )r/  mathiscloserk   *eplb_min_rebalancing_utilization_thresholdr   r2  rI  r0  r   r   float32emptyr  	broadcastrM  )r!   utilization_mean_rateswindow_indexr}  avg_rate_tensorr"   r"   r#   r    s&   



z5_StatAccumulator._get_global_average_utilization_rater*  rb   )	r%   r&   r'   rz   r   r   r   r  r   r"   r"   r   r#   r"  C  s    
r"  c                 C  sP   t tj }||  }td|  | s|jddd t	|t
| d S )NzWrite expert distribution to T)parentsexist_ok)r   r   'SGLANG_EXPERT_DISTRIBUTION_RECORDER_DIRr   rw   rx   existsmkdirr   saver   )namer   save_dirpath_outputr"   r"   r#   rr    s   rr  c                   @  s6   e Zd ZedddZdd	d
ZdddZdd ZdS )rw  ru  r   rv  r-   c                 C  s&   |dk rt | ||dS t| |||dS )Nr   r   )_InfiniteBuffer_CircularBufferrt  r"   r"   r#   r2     s   z_Buffer.init_newr   r   c                 C  r   r4   r   r!   r   r"   r"   r#   r     rD   z_Buffer.appendr   c                 C  r   r4   r   r    r"   r"   r#   r    rD   z_Buffer.get_allc                 C  r   r4   r   r    r"   r"   r#   r     rD   z_Buffer.resetNru  r   rv  r-   r   r   r   r   )r%   r&   r'   rd   r2   r   r  r   r"   r"   r"   r#   rw    s    

rw  c                   @  s2   e Zd ZdddZdd	d
ZdddZdd ZdS )r  ru  r   rv  r-   c                 C  s$   t j|g|R ||d| _d| _d S )Nr   r   )r   r   _buffer_curr_index)r!   ru  rv  r   r   r"   r"   r#   rz     s   
z_CircularBuffer.__init__r   r   c                 C  s&   || j | j< | jd t| j  | _d S )Nr   )r  r  r   r  r"   r"   r#   r     s   z_CircularBuffer.appendr   c                 C  r   r4   r  r    r"   r"   r#   r    r&  z_CircularBuffer.get_allc                 C  r   r  r  r    r"   r"   r#   r     r   z_CircularBuffer.resetNr  r  r  r%   r&   r'   rz   r   r  r   r"   r"   r"   r#   r    s
    


r  c                   @  s2   e Zd ZdddZdddZdd
dZdd ZdS )r  ru  r   c                 C  s*   || _ tjdg|R ||d| _d| _d S )N   r   r   )_item_shaper   r   r  _size)r!   ru  r   r   r"   r"   r#   rz     s   
z_InfiniteBuffer.__init__r   r   c                 C  st   t | j}| jj}| jj}| j|kr+tjd| g| jR ||d}| j|d |< || _|| j| j< |  jd7  _d S )N   r   r   )r   r  r   r   r  r   r   r  )r!   r   curr_buffer_sizer   r   
new_bufferr"   r"   r#   r     s   

z_InfiniteBuffer.appendr   c                 C  s   | j d | j S r4   r  r  r    r"   r"   r#   r    rd  z_InfiniteBuffer.get_allc                 C  s   d| j d< d| _d S r  r  r    r"   r"   r#   r     s   

z_InfiniteBuffer.resetN)ru  r   r  r  r  r"   r"   r"   r#   r    s
    


r  r   r{  rq  c           	      C  sZ   | j \}}}| j}| j}tj|||f||d}|jd|d|ddtj	| d |S )Nr   r  r   r   r
  )
r   r   r   r   r   r  	unsqueezeexpandr   int64)	r   r   r{  rq  	dim_extrar  r   r   r|  r"   r"   r#   r~    s   
r~  physical_count_of_whateverr:  c                 C  s   t j| dd|dS )z=output: gpu_physical_count_of_batch (..., num_layer, num_gpu)zC... num_layer (num_gpu num_expert_per_gpu) -> ... num_layer num_gpurN  r9  )einopsrE  )r  r:  r"   r"   r#   rD    s   rD  gpu_physical_count_of_batchc                 C  s4   |   } t| dd}t| dd}|d |d  S )z)output: utilization_rate (..., num_layer)z&... num_layer num_gpu -> ... num_layermaxrI  gh㈵>)floatr  rE  )r  max_gpu_physical_countavg_gpu_physical_countr"   r"   r#   rJ  	  s   rJ  )r   r   r   r   r   r   )
r   r-   r   r-   r   r-   r   r   r   r   )
r  r   r,   r-   r   r-   r   r-   r   r   )r   r   r   r-   r{  r-   rq  r   )r  r   r:  r-   )r  r   )K
__future__r   loggingr  rs  abcr   collectionsr   
contextlibr   dataclassesr   pathlibr   typingr	   r
   r   r   r   r   r   r   r  r   torch.distributedsglang.srt.environr   sglang.srt.metrics.collectorr   ,sglang.srt.model_executor.forward_batch_infor   sglang.srt.server_argsr   sglang.srt.utilsr   r   sglang.srt.eplb.expert_locationr   	getLoggerr%   rw   rW   r   r)   r0   r/   r   r(   r   r   rf   r   r   r   r   r   r   r   r   r  r%  rr   r+  r1  r#  r"  rr  rw  r  r  r~  rD  rJ  r"   r"   r"   r#   <module>   sn   (
I 
@T
#!

4c:X	

