o
    پi>                     @   sv  d dl mZmZmZ d dlZd dlmZ d dlmZ d dl	m
Z
mZmZ d dlmZmZmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlm Z m!Z!m"Z" d dl#m$Z$ d dl%m&Z&m'Z' d dl(m)Z)m*Z* G dd dej+Z,G dd dej+Z-G dd dej+Z.G dd dej+Z/G dd dej+Z0G dd dej+Z1G dd dej+Z2e2Z3dS )    )IterableOptionalTupleN)
DbrxConfig)get_tensor_model_parallel_rank$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)QKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)	fused_moe)MoeRunnerConfig)TopK)QuantizationConfig)RadixAttention)get_rope)DEFAULT_VOCAB_PADDING_SIZEParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loadermaybe_remap_kv_scale_name)
add_prefixset_weight_attrsc                       sP   e Zd ZdZ		ddedeej def fddZ	d	ej
d
ej
fddZ  ZS )
DbrxRouterzXA Router implementation for DBRX that returns logits for each expert
    per token.
    N configparams_dtypeprefixc                    s@   t    t | _|jj| _|j| _t| j| jd|d d| _	d S )NF)biasr   quant_config)
super__init__r   tp_size
ffn_configmoe_num_expertsnum_total_expertsd_modelr
   layer)selfr   r   r   	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/dbrx.pyr#   ;   s   

zDbrxRouter.__init__hidden_statesreturnc                 C   s   |  |\}}|S N)r)   )r*   r/   router_logits_r-   r-   r.   forwardM   s   zDbrxRouter.forwardNr   )__name__
__module____qualname____doc__r   r   torchdtypestrr#   Tensorr4   __classcell__r-   r-   r+   r.   r   6   s    r   c                	       st   e Zd ZdZ			ddedee deej de	f fdd	Z
d
ejdejde	fddZdejdejfddZ  ZS )DbrxExpertszA tensor-parallel MoE implementation for DBRX.

    Each expert's weights are sharded across all ranks and a fused MoE
    kernel is used for the forward pass, and finally we reduce the outputs
    across ranks.
    Nr   r   r!   r   r   c              	      s   t    t | _|jj| _|jj| _|j	| _	|jj
| j | _|d u r't }|| _t|| j| _t| jdd| _tdd| _ttj| jd| j | j	d| jd| _ttj| j| j	| jd| jd| _t| jd| ji t| jd| ji d S )NT)renormalize)inplace   cuda)devicer;   weight_loader)r"   r#   r   r$   r%   r&   r'   	moe_top_ktop_kr(   ffn_hidden_sizeintermediate_sizer:   get_default_dtyper   r   routerr   topkr   moe_runner_confignn	Parameteremptywsw2sr   rE   )r*   r   r!   r   r   r+   r-   r.   r#   Z   sV   


	
zDbrxExperts.__init__paramloaded_weightweight_namec                 C   s$  t  }|j}| j}t|| |d | }|dr=t|d| j| j | jg}|d d |d d f |d d d|d d f< |drht|d| j| j | jg}|d d |d d f |d d |d| d d f< |drt|d| j| j | jg	dd}|d d d d |f |d d < d S d S )N   w1r   v1rB   w2)
r   datarI   sliceendswithr:   reshaper$   r(   	transpose)r*   rS   rT   rU   tp_rank
param_data
shard_sizeshardr-   r-   r.   rE      s2   
,
0
"zDbrxExperts.weight_loaderr/   r0   c                 C   sb   |j \}}|d| j}| |}| ||}t|| j| j|| j}| j	dkr+t
|}|||S )NrX   rV   )shapeviewr(   rK   rL   r   rQ   rR   rM   r$   r   )r*   r/   
num_tokenshidden_sizer2   topk_outputfinal_hidden_statesr-   r-   r.   r4      s   


zDbrxExperts.forward)NNr   )r6   r7   r8   r9   r   r   r   r:   r;   r<   r#   rN   rO   r=   rE   r4   r>   r-   r-   r+   r.   r?   R   s,    
8
r?   c                	       Z   e Zd Z			ddededee def fdd	Zd
e	j
de	j
dede	j
fddZ  ZS )DbrxAttentionr   Nr   r   layer_idr!   r   c              
      sp  t    |j| _|j| _| j| j | _|jj| _|jj	| _	|jj
| _
|j| _t| j| j| j| jd|td|d| _t| j| jd|td|d| _t| j| j| jt| j
dd| _t }|| _| j| dkshJ | j| | _| j|kr}| j| dks|J n	|| j dksJ td| j| | _| j| j | _| j| j | _| jd	 | _t| j| j| j| j||td
|d| _d S )NFWqkv)r    r!   r   out_projT)
rotary_dimmax_positionbaseis_neox_styler   rV   g      attn)num_kv_headsrl   r!   r   )r"   r#   r(   n_headstotal_num_headshead_dimattn_config
kv_n_headstotal_num_kv_headsclip_qkv
rope_thetamax_seq_lenrp   r	   r   rm   r   rn   r   int
rotary_embr   r$   	num_headsmaxrt   q_sizekv_sizescalingr   rs   )r*   r   rl   r!   r   tp_world_sizer+   r-   r.   r#      sf   



	
zDbrxAttention.__init__position_idsr/   forward_batchr0   c           
      C   s   |  |\}}| jd ur|j| j | jd |j| j| j| jgdd\}}}| |||\}}| ||||}	| |	\}}|S )N)minr   rX   )dim)	rm   r{   clamp_splitr   r   r   rs   rn   )
r*   r   r/   r   qkvr3   qkvattn_outputr-   r-   r.   r4     s   
 zDbrxAttention.forwardr   Nr   r6   r7   r8   r   r~   r   r   r<   r#   r:   r=   r   r4   r>   r-   r-   r+   r.   rk      s,    Crk   c                
       sd   e Zd Z			ddededee def fdd	Zd
e	j
de	j
dedee	j
e	j
f fddZ  ZS )DbrxFusedNormAttentionr   Nr   r   rl   r!   r   c                    sJ   t    |j| _t|||td|d| _t| j| _t| j| _	d S )Nrs   r!   r   )
r"   r#   r(   rk   r   rs   rN   	LayerNormnorm_1norm_2r*   r   rl   r!   r   r+   r-   r.   r#     s   
zDbrxFusedNormAttention.__init__r   r/   r   r0   c                 C   s<   |}|  |}| j|||d}|| }|}| |}||fS N)r   r/   r   )r   rs   r   )r*   r   r/   r   residualxr-   r-   r.   r4   )  s   

zDbrxFusedNormAttention.forwardr   )r6   r7   r8   r   r~   r   r   r<   r#   r:   r=   r   r   r4   r>   r-   r-   r+   r.   r     s,    r   c                	       rj   )	DbrxBlockr   Nr   r   rl   r!   r   c                    s4   t    t|||td|d| _t||d| _d S )Nnorm_attn_normr   )r!   )r"   r#   r   r   r   r?   ffnr   r+   r-   r.   r#   =  s   
zDbrxBlock.__init__r   r/   r   r0   c                 C   s*   | j |||d\}}| |}|| }|S r   )r   r   )r*   r   r/   r   r   r-   r-   r.   r4   M  s   

zDbrxBlock.forwardr   r   r-   r-   r+   r.   r   <  s,    r   c                       s^   e Zd Z		ddedee def fddZ	ddej	d	ej	d
e
dej	dej	f
ddZ  ZS )	DbrxModelNr   r   r!   r   c                    s   t    t j j| _t fddt j	D | _
tj jdd| _|  D ]}t|dr@t|jtjr@|dd  q,d S )Nc              	      s(   g | ]}t  |td | dqS )zblocks.r   )r   r   ).0ir   r   r!   r-   r.   
<listcomp>j  s    z&DbrxModel.__init__.<locals>.<listcomp>gh㈵>)epsr    )r"   r#   r   
vocab_sizer(   wterN   
ModuleListrangen_layersblocksr   norm_fmoduleshasattr
isinstancer    rO   register_parameter)r*   r   r!   r   moduler+   r   r.   r#   ^  s    
zDbrxModel.__init__	input_idsr   r   input_embedsr0   c                 C   sP   |d u r
|  |}n|}tt| jD ]}| j| }||||}q| |}|S r1   )r   r   lenr   r   )r*   r   r   r   r   r/   r   blockr-   r-   r.   r4   z  s   

zDbrxModel.forwardr5   r1   )r6   r7   r8   r   r   r   r<   r#   r:   r=   r   r4   r>   r-   r-   r+   r.   r   ]  s,    !r   c                	       sx   e Zd Z		ddedee def fddZe	 dej
d	ej
d
edej
fddZdeeeej
f  fddZ  ZS )DbrxForCausalLMNr   r   r!   r   c                    sb   t    || _|| _|j| _t||td|d| _t	|j|j
|jttd|d| _t|| _d S )Ntransformerr   lm_head)org_num_embeddingspadding_sizer   )r"   r#   r   r!   r   unpadded_vocab_sizer   r   r   r   r(   r   r   r   logits_processor)r*   r   r!   r   r+   r-   r.   r#     s   
zDbrxForCausalLM.__init__r   	positionsr   r0   c                 C   s    |  |||}| ||| j|S r1   )r   r   r   )r*   r   r   r   r/   r-   r-   r.   r4     s   
zDbrxForCausalLM.forwardweightsc           
      C   s   dd dD }t | jdd}|D ]=\}}|D ]\}}||vr q|||}|| }|j}	|	|||  nt||}|d u r?q|| }t|dt}	|	|| qd S )Nc                 S   s&   g | ]}|d v r
dndd| fqS ))rW   rY   rQ   rR   zexperts.mlp.r-   )r   rU   r-   r-   r.   r     s    z0DbrxForCausalLM.load_weights.<locals>.<listcomp>)rW   rY   rZ   F)remove_duplicaterE   )dictnamed_parametersreplacerE   r   getattrr   )
r*   r   expert_params_mappingparams_dictnamerT   
param_namerU   rS   rE   r-   r-   r.   load_weights  s*   

zDbrxForCausalLM.load_weightsr5   )r6   r7   r8   r   r   r   r<   r#   r:   no_gradr=   r   r4   r   r   r   r>   r-   r-   r+   r.   r     s*    $r   )4typingr   r   r   r:   torch.nnrN   sglang.srt.configsr   sglang.srt.distributedr   r   r   sglang.srt.layers.linearr	   r
   r   "sglang.srt.layers.logits_processorr   0sglang.srt.layers.moe.fused_moe_triton.fused_moer    sglang.srt.layers.moe.moe_runnerr   sglang.srt.layers.moe.topkr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   r   sglang.srt.utilsr   r   Moduler   r?   rk   r   r   r   r   
EntryClassr-   r-   r-   r.   <module>   s2   pT&!/@