o
    پi                     @   sz   d dl Z d dlZd dlmZmZ d dlZd dlmZ d dl	m
Z
 er(d dlmZ e eZG dd dZdefd	d
ZdS )    N)TYPE_CHECKINGList)'get_global_expert_distribution_recorder)ExpertLocationMetadata)ModelRunnerc                       sT   e Zd Zd fddZdd Zdd Zd	d
 Zdd Zdeee	  fddZ
  ZS )EPLBManagermodel_runnerr   c                    sx   t    || _|j| _| jj| _| jj| _| jj| jj	ks"J dt
 js+t
   td| j d |  | _d S )Nz[eplb_rebalance_num_iterations must be greater than expert_distribution_recorder_buffer_sizez1[EPLBManager] system started, will rebalance per z iterations.)super__init___model_runnerserver_args_server_argseplb_rebalance_layers_per_chunk_rebalance_layers_per_chunkeplb_rebalance_num_iterations_rebalance_num_iterations(expert_distribution_recorder_buffer_sizer   	recordingstart_recordloggerinfo_entrypoint_main_generator)selfr   	__class__ P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/eplb/eplb_manager.pyr
      s"   


zEPLBManager.__init__c                 C   s   t | j d S N)nextr   )r   r   r   r   on_forward_pass_end)   s   zEPLBManager.on_forward_pass_endc                 c   s*    	 t | jD ]}d V  q|  E d H  qr   )ranger   	rebalance)r   _r   r   r   r   -   s   zEPLBManager._entrypointc                 c   s    t d | jd u }|rt   t }t jdd}|d }|d }| 	|s-d S t
| j| jj|}|  }t|D ]\}}	t|dkrLd V  | jj||	d q?d}
|rnt   t }|
d	|| d
d7 }
t |
 d S )Nz[EPLBManager] rebalance startobject)output_modelogical_count$average_utilization_rate_over_window   )update_layer_idsz[EPLBManager] rebalance endz time=z.3fs)r   r   r   torchget_device_modulesynchronizetimer   dump_record_check_rebalance_neededr   init_by_eplbr   r   model_config _compute_update_layer_ids_chunks	enumeratelenupdate_expert_location)r   enable_timing
time_startdump_record_outputr&   r'   expert_location_metadataupdate_layer_ids_chunkschunk_indexr)   msgtime_endr   r   r   r"   4   s@   


zEPLBManager.rebalancec                 C   s>   |d u rdS || j jkrtd|dd| j jd dS dS )NTz>[EPLBManager] Skipped ep rebalancing: current GPU utilization z.2fz > minimum rebalance threshold F)r   *eplb_min_rebalancing_utilization_thresholdr   r   )r   r'   r   r   r   r0   ]   s   z#EPLBManager._check_rebalance_neededreturnc                 C   s0   t t| jjj }| jpd}tt||dS )Ni@B )
chunk_size)sortedlistr   modelrouted_experts_weights_of_layerkeysr   _chunk_list)r   all_layer_idsrA   r   r   r   r3   l   s
   
z,EPLBManager._compute_update_layer_ids_chunks)r   r   )__name__
__module____qualname__r
   r    r   r"   r0   r   intr3   __classcell__r   r   r   r   r      s    )r   itemsc                 c   s.    t dt| |D ]}| |||  V  q	d S )Nr   )r!   r5   )rN   rA   start_indexr   r   r   rG   t   s   rG   )loggingr.   typingr   r   
torch.cudar+   #sglang.srt.eplb.expert_distributionr   sglang.srt.eplb.expert_locationr   &sglang.srt.model_executor.model_runnerr   	getLoggerrI   r   r   rG   r   r   r   r   <module>   s    
d