o
    پiVR                     @  sR  U d dl mZ d dlZd dlZd dlZd dlmZ d dlmZ d dl	m
Z
mZmZ d dlZd dlZd dlm  mZ d dlmZ d dlmZ e
rTd dlmZ d d	lmZ eeZeG d
d dZdade d< dd Z!dd Z"dDddZ#dd Z$	dEdFd#d$Z%dGd)d*Z&dHd-d.Z'dId0d1Z(dJd5d6Z)dKd<d=Z*eG d>d? d?Z+dLdBdCZ,dS )M    )annotationsN)	dataclass)Path)TYPE_CHECKINGListOptional)eplb_algorithms)get_model_architecture)ModelConfig)
ServerArgsc                   @  s   e Zd ZU ded< ded< ded< ded< ded< ded< ed6ddZed6ddZed6ddZed6ddZedd Z	dd Z
ed7ddZe	d8d7dd Zed9d"d#Zed:d$d%Zed;d'd(Zd<d-d.Z	/d=d>d4d5ZdS )?ExpertLocationMetadatatorch.Tensorphysical_to_logical_mapphysical_to_logical_map_cpulogical_to_all_physical_maplogical_to_all_physical_map_cpu%logical_to_all_physical_map_num_validzOptional[torch.Tensor]%logical_to_rank_dispatch_physical_mapreturnintc                 C     | j jd S Nr   r   shapeself r   S/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/eplb/expert_location.py
num_layers2      z!ExpertLocationMetadata.num_layersc                 C  r   N   r   r   r   r   r   num_physical_experts6   r   z+ExpertLocationMetadata.num_physical_expertsc                 C  s"   t | j| j\}}|dksJ |S r   )divmodr"   ep_size)r   ans	remainderr   r   r   num_local_physical_experts:   s   z1ExpertLocationMetadata.num_local_physical_expertsc                 C  r   r    )r   r   r   r   r   r   num_logical_experts@   r   z*ExpertLocationMetadata.num_logical_expertsc                 C  s
   t j S N)torchdistributedget_world_sizer   r   r   r   r$   D   s   
zExpertLocationMetadata.ep_sizec                 C  s^   | j j\}}| jj\}}}| jj\}}||  kr|ks!J  J ||ks'J ||ks-J d S r)   )r   r   r   r   )r   num_layers_0num_physical_experts_0num_layers_1num_logical_experts_0num_physical_experts_1num_layers_2num_logical_experts_1r   r   r   __post_init__I   s   z$ExpertLocationMetadata.__post_init__server_argsr   model_configr
   moe_ep_rankc           	      C  s^   t | |}|du rdS |d }|d }|j}|j}td||d| }t j| |||dS )zDTrivial location - logical expert i corresponds to physical expert iNr"    model_config_for_expert_locationr   r!   )r   r7   )r   _init_commonr   r(   r*   arangerepeatinit_by_mapping)	r5   r6   r7   commonr"   r8   r   r(   r   r   r   r   init_trivialW   s"   z#ExpertLocationMetadata.init_trivialNc                 C  sp   t |tjst|}|| j}t| |}|d u rd S |d }t| ||j	|d |d}tj
| |d ||dS )Nr8   r$   )r5   r   r(   r$   r7   r5   r$   r   r   )
isinstancer*   Tensortensortodevicer   r9   $_compute_logical_to_all_physical_mapr(   	_init_raw)r5   r6   r   r7   r=   r8   r   r   r   r   r<   r   s(   
z&ExpertLocationMetadata.init_by_mappinglogical_countc                 C  s   t |tjst|}t|jdkr|d}|| j}t	
| |}|d u r)d S |d }|d }|j}| j}tj||||d  ||tj| j||dd\}}	}
t	j| |d || j|	| jdS )	N   r   r8   r"   r$   )raw_algorithm
num_groups	num_nodes)tokens_per_expertr"   r'   rJ   rK   	algorithmr?   )r@   r*   rA   rB   lenr   	unsqueezerC   rD   r   r9   rJ   nnodesr   rebalance_expertscompute_algorithmeplb_algorithmrF   )r5   r6   rG   r=   r8   r"   rJ   rK   r   r   expert_countr   r   r   init_by_eplb   sB   



z#ExpertLocationMetadata.init_by_eplbc                 C  sP   t |}|d u rd S |j| j }| j}|| dksJ || }t||||dS )Nr   )r8   r"   r'   r$   )ModelConfigForExpertLocationfrom_model_configr(   ep_num_redundant_expertsr$   dict)r5   r6   r8   r"   r$   r'   r   r   r   r9      s"   z#ExpertLocationMetadata._init_commonr$   c                 C  s   |j \}}tj|d||j d  fdd}tj|dkdd}t|| || || jdkr<t| |||tj	
 | ddS d dS )Nr   value)dimstatic)r5   r   r$   r"   ep_rankr   r   r   r   r   r   )r   Fpadr*   count_nonzeror   cpuep_dispatch_algorithm-compute_logical_to_rank_dispatch_physical_mapr+   get_rank)r5   r$   r   r   _r"   "logical_to_all_physical_map_paddedr   r   r   r   rF      s4   


z ExpertLocationMetadata._init_rawother'ExpertLocationMetadata'update_layer_ids	List[int]c                   s   dD ]}t | |t ||ksJ qdD ]I}t ||}t | |}|d u|d uks)J |d ur\t fddt| jD }|jdgdg| d    }|j|jdd}t	||||d	< qd S )
N)r$   r`   c                   s   g | ]}| v qS r   r   ).0irl   r   r   
<listcomp>  s    z1ExpertLocationMetadata.update.<locals>.<listcomp>rZ   r!   T)non_blocking.)
getattrr*   rB   ranger   viewr]   rC   rD   where)r   rj   rl   fieldother_field
self_fieldmask_updater   rp   r   update   s   

zExpertLocationMetadata.updateFlayer_idlogical_expert_idrequire_global_expertsboolc                 C  sB   |r| j | jd }tt||| jS dd | j ||f  D S )NrZ   c                 S     g | ]}|d kr|qS rZ   r   rn   physical_expert_idr   r   r   rq   ,  
    zBExpertLocationMetadata.logical_to_all_physical.<locals>.<listcomp>)r   r   listrt   r(   tolist)r   r|   r}   r~   r"   r   r   r   logical_to_all_physical  s   
z.ExpertLocationMetadata.logical_to_all_physical)r   r   )r5   r   r6   r
   r7   r   r)   )r5   r   r6   r
   rG   r   )r5   r   r6   r
   )r5   r   r$   r   r   r   r   r   )rj   rk   rl   rm   )F)r|   r   r}   r   r~   r   r   rm   )__name__
__module____qualname____annotations__propertyr   r"   r'   r(   r$   r4   staticmethodr>   r<   rU   r9   rF   r{   r   r   r   r   r   r   &   s@   
 
+
(#r    Optional[ExpertLocationMetadata] _global_expert_location_metadatac                   C  s   t S r)   r   r   r   r   r   #get_global_expert_location_metadata8  s   r   c                 C  s   t d u sJ | a d S r)   r   r[   r   r   r   #set_global_expert_location_metadata<  s   r   r5   r   r   r   r(   r   r$   r7   c              	     s   |j \}} fddt|D }t|D ]}t|D ]}	|||	f  }
|| |
 |	 qq|d urg| j| j }|| }|| }t|D ]!}t D ]}
t|| |
 ||||d}|dkre|g|| |
< qKqEt|dd}tj	||j
dS )Nc                   s   g | ]}d d t  D qS )c                 S  s   g | ]}g qS r   r   rn   rh   r   r   r   rq   N  s    zC_compute_logical_to_all_physical_map.<locals>.<listcomp>.<listcomp>)rt   r   r(   r   r   rq   M  s    z8_compute_logical_to_all_physical_map.<locals>.<listcomp>candidate_physical_expert_idsnum_local_gpu_physical_expertsr7   num_gpus_per_nodenum_local_node_physical_expertsrZ   )	pad_value)rD   )r   rt   itemappendr$   rP   _find_nearest_expert_pad_nested_arrayr*   rB   rD   )r5   r   r(   r$   r7   r   r"   r   r|   r   r}   r   r   r   nearest_expertr   r   r   rE   B  sT   
	
	rE   c                   s*   t dd | D   fdd| D }|S )Nc                 s  s"    | ]}|D ]}t |V  qqd S r)   rN   )rn   outerinnerr   r   r   	<genexpr>  s     z$_pad_nested_array.<locals>.<genexpr>c                   s    g | ]} fd d|D qS )c                   s"   g | ]}|g t |   qS r   r   )rn   r   max_lenr   r   r   rq     s   " z0_pad_nested_array.<locals>.<listcomp>.<listcomp>r   )rn   r   r   r   r   rq     s    z%_pad_nested_array.<locals>.<listcomp>)max)arrr   paddedr   r   r   r   ~  s
   r   *   r   r"   r_   seedc              
   C  s  t |}|| }| j| j }|| }	|j\}
}}|j}tj||
|fd|d}t|
D ]C}t|D ]<}t	|||}|d d ||f }t|D ]}t
|||||	d||< qFt|dk }tjt|||d|d||dk< q1q+t|dksxJ |j}||d d d d f |S )NrZ   )size
fill_valuedtyper   )kr)r   )randomRandomr$   rP   r   r   r*   fullrt   _logical_to_all_physical_rawr   sumr   rB   _fair_choicesallrD   rC   )r5   r   r$   r"   r_   r   r   r   r   r   r   r(   rh   r   r   r|   r}   r   output_partialr7   
num_remainrD   r   r   r   rf     sH   
	rf   r|   r}   r   rm   c                 C  s   dd | ||f   D S )Nc                 S  r   r   r   r   r   r   r   rq     r   z0_logical_to_all_physical_raw.<locals>.<listcomp>)r   )r   r|   r}   r   r   r   r     s   r   r   r   c                 C     | | S r)   r   )r   r   r   r   r   "_compute_gpu_id_of_physical_expert     r   num_local_host_physical_expertsc                 C  r   r)   r   )r   r   r   r   r   #_compute_node_id_of_physical_expert  r   r   r   r   r   c                   sp   t | dkr
| d S  fdd| D }t |dkr|d S  | fdd| D }t |dkr6|d S dS )Nr!   r   c                      g | ]}t | kr|qS r   )r   r   )r7   r   r   r   rq         z(_find_nearest_expert.<locals>.<listcomp>c                   r   r   )r   r   )	node_rankr   r   r   rq     r   rZ   r   )r   r   r7   r   r   same_gpu_physical_expert_idssame_node_physical_expert_idsr   )r7   r   r   r   r   r     s   r   r   r   r   r   random.Randomc                 C  s6   t |t| \}}| | |j| |d }|| |S )N)r   )r#   rN   sampleshuffle)r   r   r   quotientr&   r%   r   r   r   r     s   
r   c                   @  s8   e Zd ZU ded< ded< dZded< edd	d
ZdS )rV   r   r   r(   NzOptional[int]rJ   r6   r
   c                 C  s&   t | \}}t|dr|| jS d S )N$get_model_config_for_expert_location)r	   hasattrr   	hf_config)r6   model_classrh   r   r   r   rW     s   
z.ModelConfigForExpertLocation.from_model_config)r6   r
   )r   r   r   r   rJ   r   rW   r   r   r   r   rV     s   
 rV   r6   r
   c                 C  s   | j }|dkrt| ||S |drtj|dd}n|dr*tt|	 }nt|}d|v rFt
d tj| |fi |d|iS d	|v rYt
d
 tj| ||d	 dS tdt| d)Ntrivialz.ptT)weights_onlyz.jsonr   zOinit_expert_location from init_by_mapping using ServerArgs.init_expert_locationr7   rG   zLinit_expert_location from init_by_eplb using ServerArgs.init_expert_location)rG   z<Unknown init_expert_location format (list(data_dict.keys())=))init_expert_locationr   r>   endswithr*   loadjsonloadsr   	read_textloggerinfor<   rU   NotImplementedErrorr   keys)r5   r6   r7   data	data_dictr   r   r   (compute_initial_expert_location_metadata  s@   



r   )
r5   r   r   r   r(   r   r$   r   r7   r   )r   )r5   r   r   r   r$   r   r"   r   r_   r   r   r   )r|   r   r}   r   r   rm   )r   r   r   r   r   r   )r   r   r   r   r   r   )r   rm   r   r   r7   r   r   r   r   r   r   r   )r   r   r   r   r   r   r   r   )r5   r   r6   r
   r7   r   r   r   )-
__future__r   r   loggingr   dataclassesr   pathlibr   typingr   r   r   r*   torch.distributedtorch.nn.functionalnn
functionalra   sglang.srt.eplbr   sglang.srt.model_loaderr	   sglang.srt.configs.model_configr
   sglang.srt.server_argsr   	getLoggerr   r   r   r   r   r   r   rE   r   rf   r   r   r   r   r   rV   r   r   r   r   r   <module>   sF   
  
<
6



(