o
    پi^3                     @   s:  d dl mZmZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$ G dd dej%Z&G dd dej%Z'G dd dej%Z(G dd dej%Z)G dd dej%Z*e*Z+dS )    )AnyDictIterableOptionalTupleN)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixc                       sH   e Zd Z		ddedededee deddf fd	d
Zdd Z  Z	S )InternLM2MLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    sh   t    t||gd d|td|d| _t||d|td|d| _|dkr.td| dt | _	d S )	N   Fgate_up_projbiasr   r   w2siluzUnsupported activation: z!. Only silu is supported for now.)
super__init__r   r   r!   r   r$   
ValueErrorr
   act_fn)selfr   r   r   r   r   	__class__ O/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/internlm2.pyr'   -   s(   

zInternLM2MLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r!   r)   r$   )r*   xgate_up_r-   r-   r.   forwardK   s   
zInternLM2MLP.forwardNr   )
__name__
__module____qualname__intstrr   r   r'   r3   __classcell__r-   r-   r+   r.   r   ,   s"    r   c                       s   e Zd Z						ddededed	ed
eeeef  dededee	 deddf fddZ
dejdejdedejfddZ  ZS )InternLM2Attention'  N    r   r   r   	num_headsnum_kv_heads
rope_thetarope_scalingmax_position_embeddingslayer_idr   r   r   c
              
      sJ  t    || _t }
|| _| j|
 dksJ | j|
 | _|| _| j|
kr/| j|
 dks.J n	|
| j dks8J td| j|
 | _|| j | _	| j| j	 | _
| j| j	 | _| j	d | _|| _|| _t|| j	| j| jd|td|	d| _t| j| j	 |d|td|	d| _t| j	| j	|||d| _t| j| j	| j| j||td	|	d
| _d S )Nr      g      Fwqkvr"   wo)
rotary_dimmax_positionbaserA   attn)r   r   )r&   r'   r   r	   total_num_headsr>   total_num_kv_headsmaxr?   head_dimq_sizekv_sizescalingr@   rB   r   r   rE   r   rF   r   
rotary_embr   rJ   )r*   r   r>   r?   r@   rA   rB   rC   r   r   tp_sizer+   r-   r.   r'   S   sb   

	
zInternLM2Attention.__init__	positionshidden_statesforward_batchc                 C   sb   |  |\}}|j| j| j| jgdd\}}}| |||\}}| ||||}	| |	\}
}|
S )Ndim)rE   splitrO   rP   rR   rJ   rF   )r*   rT   rU   rV   qkvr2   qkvattn_outputoutputr-   r-   r.   r3      s    zInternLM2Attention.forward)r<   Nr=   r   Nr   )r5   r6   r7   r8   floatr   r   r9   r   r   r'   torchTensorr   r3   r:   r-   r-   r+   r.   r;   R   sJ    	
Er;   c                       sr   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )InternLMDecoderLayerr   Nr   configrC   r   r   r   c                    s   t    |j| _t|dd}t|dd }t|dd}t| j|j|j|||||td|d	| _t	| j|j
|j|td|d	| _t|j|jd
| _t|j|jd
| _d S )Nr@   r<   rA   rB   r=   	attention)	r   r>   r?   r@   rA   rB   rC   r   r   feed_forward)r   r   r   r   r   eps)r&   r'   r   getattrr;   num_attention_headsnum_key_value_headsr   rf   r   r   r   rg   r   rms_norm_epsattention_normffn_norm)r*   re   rC   r   r   r@   rA   rB   r+   r-   r.   r'      s2   
zInternLMDecoderLayer.__init__rT   rU   rV   residualc                 C   sZ   |d u r|}|  |}n|  ||\}}| j|||d}| ||\}}| |}||fS )N)rT   rU   rV   )rn   rf   ro   rg   )r*   rT   rU   rV   rp   r-   r-   r.   r3      s   
zInternLMDecoderLayer.forward)r   Nr   )r5   r6   r7   r   r8   r   r   r9   r'   rb   rc   r   r   r3   r:   r-   r-   r+   r.   rd      s4    !rd   c                       sb   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dej	dej	f
ddZ  ZS )InternLM2ModelNr   re   r   r   r   c                    st   t     | _ j| _ j| _t j jtdd| _	t
 fddt jD | _t j jd| _d S )Ntok_embeddingsr   c              	      s(   g | ]}t  |td | dqS )zlayers.rs   )rd   r   ).0ire   r   r   r-   r.   
<listcomp>   s    z+InternLM2Model.__init__.<locals>.<listcomp>rh   )r&   r'   re   pad_token_idpadding_idx
vocab_sizer   r   r   rr   r   
ModuleListrangenum_hidden_layerslayersr   rm   normr*   re   r   r   r+   rv   r.   r'      s   
zInternLM2Model.__init__	input_idsrT   rV   input_embedsc           
      C   s`   |d u r
|  |}n|}d }tt| jD ]}| j| }|||||\}}q| ||\}}	|S r/   )rr   r|   lenr~   r   )
r*   r   rT   rV   r   rU   rp   ru   layerr2   r-   r-   r.   r3      s   

zInternLM2Model.forwardr4   r/   )r5   r6   r7   r   r   r   r9   r'   rb   rc   r   r3   r:   r-   r-   r+   r.   rq      s0    rq   c                       s   e Zd Z		ddedee deddf fddZdej	fd	d
Z
e 	ddejdejdedejdejf
ddZdeeeejf  fddZ  ZS )InternLM2ForCausalLMNr   re   r   r   r   c                    sT   t    || _|| _t||td|d| _t|j|j	td|d| _
t|| _d S )Nmodelrs   r`   )r&   r'   re   r   rq   r   r   r   rz   r   r`   r   logits_processorr   r+   r-   r.   r'     s   
zInternLM2ForCausalLM.__init__c                 C   s   | j jS r/   )r   rr   )r*   r-   r-   r.   get_input_embeddings%  s   z)InternLM2ForCausalLM.get_input_embeddingsr   rT   rV   r   c                 C   s"   |  ||||}| ||| j|S r/   )r   r   r`   )r*   r   rT   rV   r   rU   r-   r-   r.   r3   (  s   
zInternLM2ForCausalLM.forwardweightsc                 C   sv  ddg}t |  }|D ]\}}d|v rq|D ](\}}}||vr!q|||}|dr1||vr1q|| }	|	j}
|
|	||  nx|drJ||vrJq|| }	d|v r| j}|j|j }|j|j }|	dd| ||j
d }tj||ddgdd	\}}}|d|j
d }|d|j
d }|d|j
d }|	j}
|
|	|d
 |
|	|d |
|	|d qt|	dt}
|
|	| qd S )N)r!   w1r   )r!   w3rD   zrotary_emb.inv_freqz.biasrE   rW   r    rD   rX   r\   r]   r^   weight_loader)dictnamed_parametersreplaceendswithr   re   rk   rl   r   viewshaperb   rZ   reshaperj   r   )r*   r   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamr   re   	kv_groupsrN   wqwkwvr-   r-   r.   load_weights5  sR   
z!InternLM2ForCausalLM.load_weightsr4   r/   )r5   r6   r7   r   r   r   r9   r'   r   	Embeddingr   rb   no_gradrc   r   r3   r   r   r   r:   r-   r-   r+   r.   r     s6    $r   ),typingr   r   r   r   r   rb   r   transformersr   sglang.srt.distributedr	   sglang.srt.layers.activationr
   sglang.srt.layers.layernormr   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   Moduler   r;   rd   rq   r   
EntryClassr-   r-   r-   r.   <module>   s,   &T;2R