o
    پi8                     @   s:  d Z ddlZddlmZmZmZmZmZ ddlZddlm	Z	 ddl
mZ ddlmZ ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$ G dd de	j%Z&G dd de	j%Z'G dd de	j%Z(G dd de	j%Z)G dd de	j%Z*e*Z+dS )zAInference-only MiniCPM model compatible with HuggingFace weights.    N)AnyDictIterableOptionalTuple)nn)$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixc                       sH   e Zd Z		ddedededee deddf fd	d
Zdd Z  Z	S )
MiniCPMMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    sh   t    t||gd d|td|d| _t||d|td|d| _|dkr.td| dt | _	d S )	N   Fgate_up_projbiasr   r   	down_projsiluzUnsupported activation: z!. Only silu is supported for now.)
super__init__r   r   r    r   r#   
ValueErrorr	   act_fn)selfr   r   r   r   r   	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/minicpm.pyr&   ,   s(   

zMiniCPMMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r    r(   r#   )r)   xgate_up_r,   r,   r-   forwardJ   s   
zMiniCPMMLP.forwardNr   )
__name__
__module____qualname__intstrr   r   r&   r2   __classcell__r,   r,   r*   r-   r   +   s"    r   c                       s   e Zd Z						ddededed	ed
edeeeef  dedee	 deddf fddZ
dejdejdedejfddZ  ZS )MiniCPMAttentionr   '  N    r   r   	num_headsnum_kv_headslayer_id
rope_thetarope_scalingmax_position_embeddingsr   r   r   c
              
      sJ  t    || _t }
|| _| j|
 dksJ | j|
 | _|| _| j|
kr/| j|
 dks.J n	|
| j dks8J td| j|
 | _|| j | _	| j| j	 | _
| j| j	 | _| j	d | _|| _|| _t|| j	| j| jd|td|	d| _t| j| j	 |d|td|	d| _t| j	| j	|||d| _t| j| j	| j| j||td	|	d
| _d S )Nr      g      Fqkv_projr!   o_proj)
rotary_dimmax_positionbaserA   attn)r>   r?   r   r   )r%   r&   r   r   total_num_headsr=   total_num_kv_headsmaxr>   head_dimq_sizekv_sizescalingr@   rB   r   r   rD   r   rE   r   
rotary_embr   rI   )r)   r   r=   r>   r?   r@   rA   rB   r   r   tp_sizer*   r,   r-   r&   R   sb   

	
zMiniCPMAttention.__init__	positionshidden_statesforward_batchc                 C   s   |  |\}}|j| j| j| jgdd\}}}|j}	| | }}| |||\}}||	||	}}| ||||}
| 	|
\}}|S )N)dim)
rD   splitrN   rO   dtypefloatrQ   torI   rE   )r)   rS   rT   rU   qkvr1   qkv
orig_dtypeattn_outputoutputr,   r,   r-   r2      s    zMiniCPMAttention.forward)r   r;   Nr<   Nr   )r4   r5   r6   r7   rZ   r   r   r8   r   r   r&   torchTensorr   r2   r9   r,   r,   r*   r-   r:   Q   sJ    	
Er:   c                       sn   e Zd Z			ddedee deddf fdd	Zd
ej	dej	de
deej	 deej	ej	f f
ddZ  ZS )MiniCPMDecoderLayerr   Nr   r?   r   r   r   c                    s   t    || _|j| _t|dd}t|dd }t|dd}t| j|j|j|||||td|d	| _	t
| j|j|j|td|d	| _t|j|jd
| _t|j|jd
| _d S )Nr@   r;   rA   rB   r<   	self_attn)	r   r=   r>   r?   r@   rA   rB   r   r   mlp)r   r   r   r   r   eps)r%   r&   configr   getattrr:   num_attention_headsnum_key_value_headsr   rf   r   r   r   rg   r
   rms_norm_epsinput_layernormpost_attention_layernorm)r)   rj   r?   r   r   r@   rA   rB   r*   r,   r-   r&      s8   
zMiniCPMDecoderLayer.__init__rS   rT   rU   residualc                 C   sz   |}|  |}| j|||d}||| jjt| jj   }|}| |}| |}||| jjt| jj   }|d fS )N)rS   rT   rU   )	ro   rf   rj   scale_depthmathsqrtnum_hidden_layersrp   rg   )r)   rS   rT   rU   rq   r,   r,   r-   r2      s"   


zMiniCPMDecoderLayer.forward)r   Nr   )r4   r5   r6   r7   r   r   r8   r&   rc   rd   r   r   r2   r9   r,   r,   r*   r-   re      s0    $re   c                       s^   e Zd Z		ddee deddf fddZ	ddejd	ejd
e	dejdejf
ddZ
  ZS )MiniCPMModelNr   r   r   r   c                    sx   t     | _ j| _ j| _t| j j jtdd| _	t
 fddt jD | _t j jd| _d S )Nembed_tokensorg_num_embeddingsr   c              	      s(   g | ]}t  |td | dqS )zlayers.r   r   )re   r   ).0irj   r   r   r,   r-   
<listcomp>   s    z)MiniCPMModel.__init__.<locals>.<listcomp>rh   )r%   r&   rj   pad_token_idpadding_idx
vocab_sizer   r   r   rw   r   
ModuleListrangeru   layersr
   rn   normr)   rj   r   r   r*   r}   r-   r&      s    
zMiniCPMModel.__init__	input_idsrS   rU   input_embedsc           	      C   sb   |d u r|  || jj }n|}d }tt| jD ]}| j| }|||||\}}q| |}|S r.   )rw   rj   	scale_embr   lenr   r   )	r)   r   rS   rU   r   rT   rq   r|   layerr,   r,   r-   r2   	  s   


zMiniCPMModel.forwardr3   r.   )r4   r5   r6   r   r   r8   r&   rc   rd   r   r2   r9   r,   r,   r*   r-   rv      s,    "rv   c                       s   e Zd Z		ddee deddf fddZe 	ddej	d	ej	d
e
dej	dej	f
ddZdeeeej	f  fddZ  ZS )MiniCPMForCausalLMNr   r   r   r   c                    s   t    || _t| jdd| _|| _t||td|d| _| jj	s1t
|j|j|jtd|d| _| jj| jj | _t|| _d S )Nnum_expertsr   modelrz   lm_headrx   )r%   r&   rj   rk   r   r   rv   r   r   tie_word_embeddingsr   r   r   r   dim_model_basescale_widthr   logits_processorr   r*   r,   r-   r&   #  s    
zMiniCPMForCausalLM.__init__r   rS   rU   r   c                 C   sV   |d ur
|| j j }| ||||}|| j }| j jr | jj}n| j}| ||||S r.   )rj   r   r   r   r   rw   r   r   )r)   r   rS   rU   r   rT   r   r,   r,   r-   r2   >  s   

zMiniCPMForCausalLM.forwardweightsc                 C   s6  g d}dd t | jD }t|  }|D ]\}}d|v rqd|v s'd|v r(q| jjr1d|v r1q|D ](\}}}	||vr=q3|||}|drM||vrMq3|| }
|
j}||
||	  n<|D ] \}}}||vrhq^|||}|| }
|
j}||
|||d	  n|dr||vrq|| }
t	|
d
t
}||
| qd S )N))rD   q_projr]   )rD   k_projr^   )rD   v_projr_   )r    	gate_projr   )r    up_projrC   c              	   S   s:   g | ]}d D ]}|dv rdndd| d| d|fqqS ))w1w2w3)r   r   wsw2szexperts..z.weightr,   )r{   	expert_idweight_namer,   r,   r-   r~   Y  s    z3MiniCPMForCausalLM.load_weights.<locals>.<listcomp>zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedzlm_head.weightz.bias)r   weight_loader)r   r   dictnamed_parametersrj   r   replaceendswithr   rk   r   )r)   r   stacked_params_mappingexpert_params_mappingparams_dictnameloaded_weight
param_namer   shard_idparamr   r   r,   r,   r-   load_weightsP  sR   

zMiniCPMForCausalLM.load_weightsr3   r.   )r4   r5   r6   r   r   r8   r&   rc   no_gradrd   r   r2   r   r   r   r9   r,   r,   r*   r-   r   "  s0    $r   ),__doc__rs   typingr   r   r   r   r   rc   r   sglang.srt.distributedr   sglang.srt.layers.activationr	   sglang.srt.layers.layernormr
   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   Moduler   r:   re   rv   r   
EntryClassr,   r,   r,   r-   <module>   s.   &WC7m