o
    پiP.                     @   s:  d dl mZmZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ G dd dej%Z&G dd dej%Z'G dd dej%Z(G dd dej%Z)G dd dej%Z*e*Z+dS )    )AnyDictIterableOptionalTupleN)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixc                       sF   e Zd Z			ddedededee def
 fd	d
Zdd Z  Z	S )QWenMLPsiluN hidden_sizeintermediate_size
hidden_actquant_configprefixc              	      sl   t    t|d|g dd|td|d| _t||dd|td|d| _|dkr0td	| d
t | _	d S )N   Fgate_up_proj)biasgather_outputr   r   Tc_projr"   input_is_parallelr   r   r   zUnsupported activation: z!. Only silu is supported for now.)
super__init__r   r   r!   r   r$   
ValueErrorr
   act_fn)selfr   r   r   r   r   	__class__ J/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/qwen.pyr(   .   s,   

zQWenMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r!   r*   r$   )r+   xgate_up_r.   r.   r/   forwardN   s   
zQWenMLP.forward)r   Nr   )
__name__
__module____qualname__intstrr   r   r(   r4   __classcell__r.   r.   r,   r/   r   -   s      r   c                       sz   e Zd Z					ddedededed	ed
eeeef  dee	 def fddZ
dejdejdedejfddZ  ZS )QWenAttentionr   '  Nr   r   	num_headsmax_position_embeddingslayer_id
rope_thetarope_scalingr   r   c	           
   
      s   t    || _t }	|| _| j|	 dksJ | j|	 | _|| j | _t|| j| jd|td|d| _	t
| j| j |dd|td|d| _t| j| j|||d| _| jd	 | _t| j| j| j| j||td
|d| _d S )Nr   Tc_attn)r"   r   r   Fr$   r%   )
rotary_dimmax_positionbaserA   g      attn)num_kv_headsr?   r   r   )r'   r(   r   r	   total_num_headsr=   head_dimr   r   rB   r   r$   r   
rotary_embscalingr   rF   )
r+   r   r=   r>   r?   r@   rA   r   r    tensor_model_parallel_world_sizer,   r.   r/   r(   V   sP   

zQWenAttention.__init__	positionshidden_statesforward_batchreturnc                 C   sV   |  |\}}|jddd\}}}| |||\}}| ||||}	| |	\}
}|
S )N   )chunksdim)rB   chunkrJ   rF   r$   )r+   rM   rN   rO   qkvr3   qkvattn_outputoutputr.   r.   r/   r4      s   zQWenAttention.forward)r   r<   NNr   )r5   r6   r7   r8   floatr   r   r9   r   r   r(   torchTensorr   r4   r:   r.   r.   r,   r/   r;   U   s@    	6r;   c                       T   e Zd Z		ddedee def fddZdej	d	ej	d
e
dej	fddZ  ZS )	QWenBlockNr   configr   r   c                    s   t    t|j|jd| _t|dd}t|dd }t|j|j|j	||||t
d|d| _t|j|jd| _t|j|jd |t
d|d	| _d S )
Nepsr@   r<   rA   rF   )r@   rA   r?   r   r   r    mlpr   r   )r'   r(   r   r   layer_norm_epsilonln_1getattrr;   num_attention_headsr>   r   rF   ln_2r   r   rd   )r+   ra   r?   r   r   r@   rA   r,   r.   r/   r(      s*   
zQWenBlock.__init__rM   rN   rO   rP   c                 C   sJ   |}|  |}| j|||d}|| }|}| |}| |}|| }|S )N)rM   rN   rO   )rg   rF   rj   rd   )r+   rM   rN   rO   residualr.   r.   r/   r4      s   


zQWenBlock.forwardNr   r5   r6   r7   r   r   r   r9   r(   r]   r^   r   r4   r:   r.   r.   r,   r/   r`      s&     r`   c                       r_   )	QWenModelNr   ra   r   r   c                    s|   t     | _ j| _ jd d d }t| jtdd| _t	 fddt
 jD | _t j jd| _d S )N?   @   wter   c              	      s(   g | ]}t  |td | dqS )zh.re   )r`   r   ).0ira   r   r   r.   r/   
<listcomp>   s    z&QWenModel.__init__.<locals>.<listcomp>rb   )r'   r(   ra   
vocab_sizer   r   r   rq   r   
ModuleListrangenum_hidden_layershr   rf   ln_fr+   ra   r   r   rw   r,   ru   r/   r(      s   
zQWenModel.__init__	input_idsrM   rO   rP   c                 C   sB   |  |}tt| jD ]}| j| }||||}q| |}|S r0   )rq   ry   lenr{   r|   )r+   r~   rM   rO   rN   rt   layerr.   r.   r/   r4      s   


zQWenModel.forwardrl   rm   r.   r.   r,   r/   rn      s&    rn   c                       s   e Zd Z		ddedee def fddZe	 dej
d	ej
d
efddZe	 dej
d	ej
d
edeeef fddZdeeeej
f  fddZ  ZS )QWenLMHeadModelNr   ra   r   r   c                    s^   t    || _t||td|d| _|jd d d }t||jtd|d| _	t
|| _d S )Ntransformerre   ro   rp   lm_headrr   )r'   r(   ra   rn   r   r   rw   r   r   r   r   logits_processorr}   r,   r.   r/   r(     s   
zQWenLMHeadModel.__init__r~   rM   rO   c                 C   s    |  |||}| ||| j|S r0   )r   r   r   )r+   r~   rM   rO   rN   r.   r.   r/   r4     s   
zQWenLMHeadModel.forwardsplit_intervalc           
      C   s   |\}}|dkr| j ||_t||D ]}| j j| }|||j||_q|| j jjkr@| j |j|_| ||j| j	|}	|	S d }	|	S )Nr   )
r   rq   rN   ry   r{   ra   rz   r|   r   r   )
r+   r~   rM   rO   r   startendrt   r   resultr.   r.   r/   forward_split_prefill!  s(   z%QWenLMHeadModel.forward_split_prefillweightsc                 C   s   ddg}t |  }|D ]M\}}d|v rq|D ](\}}}||vr!q|||}|dr1||vr1q|| }	|	j}
|
|	||  n|drJ||vrJq|| }	t|	dt}
|
|	| qd S )N)r!   w2r   )r!   w1   zrotary_emb.inv_freqz.biasweight_loader)dictnamed_parametersreplaceendswithr   rh   r   )r+   r   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamr   r.   r.   r/   load_weightsE  s0   
zQWenLMHeadModel.load_weightsrl   )r5   r6   r7   r   r   r   r9   r(   r]   no_gradr^   r   r4   r   r8   r   r   r   r:   r.   r.   r,   r/   r     s:    
$#r   ),typingr   r   r   r   r   r]   r   transformersr   sglang.srt.distributedr	   sglang.srt.layers.activationr
   sglang.srt.layers.layernormr   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   Moduler   r;   r`   rn   r   
EntryClassr.   r.   r.   r/   <module>   s,   (E90`