o
    
۾i!                     @   s   d Z ddlZddlmZ ddlmZ ddlmZmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ G dd dejZG dd deZG dd deZG dd deZ dS )zBInference-only MiniCPM3 model compatible with HuggingFace weights.    N)nn)PretrainedConfig)CacheConfig
VllmConfig)$get_tensor_model_parallel_world_size)	Attention)RMSNorm)ColumnParallelLinearReplicatedLinearRowParallelLinear)QuantizationConfig)get_rope)MiniCPMDecoderLayerMiniCPMForCausalLMMiniCPMModel   )make_layersc                       s   e Zd Z				ddededededed	ed
ededededB dedB deddf fddZde	j
de	j
de	j
fddZ  ZS )MiniCPM3Attention    N confighidden_size	num_headsqk_nope_head_dimqk_rope_head_dim
v_head_dimq_lora_rankkv_lora_rankmax_position_embeddingscache_configquant_configprefixreturnc              	      s~  t    || _|| _|| _|| | _|| _|| _|| _|| _	t
 }| j	| dks+J || | _| jd | _|	| _t| j| jd|d| _t| j|jd| _t|| j	| j d|| dd| _t| j| j| j d|| dd| _t| j|jd| _t| j| j	| j| j  d|| d	d| _t| j	| j | jd|| d
d| _t| j|	|jd| _t| j| j| j| j|
|| dd| _d S )Nr   g      F)biasr    epsz	.q_b_proj)r#   r    r!   z.kv_a_proj_with_mqaz
.kv_b_projz.o_proj)max_positionrope_parametersz.attn)num_kv_headsr   r    r!   )super__init__r   r   r   qk_head_dimr   r   r   r   r   num_local_headsscalingr   r
   q_a_projr   rms_norm_epsq_a_layernormr	   q_b_projkv_a_proj_with_mqakv_a_layernorm	kv_b_projr   o_projr   r'   
rotary_embr   attn)selfr   r   r   r   r   r   r   r   r   r   r    r!   tp_size	__class__ W/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/minicpm3.pyr*   5   sz   





zMiniCPM3Attention.__init__	positionshidden_statesc                 C   s$  |  |\}}| |}| |\}}|d| j| j}|j| j| jgdd\}}| 	|\}}|j| j
| jgdd\}}|d}| | }| |\}}|d| j| j| j }|j| j| jgdd\}	}
|d d d d | j
d f }| ||d| j| j |d| j\}}|d| j| j}|dd| j}||d| jd f< t|}|	|dd | jf< ||d| jd f< |d| j| j }|d| j| j }tjjj|
d| j| j gddd| j| j }
| |||
}|d| j| jdd | jf d| j| j }| |\}}|S )N)dimr   .r   )value)r.   r0   r1   viewr,   r+   splitr   r   r2   r   	unsqueezer3   
contiguousr4   r   r6   reshapetorch
empty_liker   
functionalpadr7   r5   )r8   r>   r?   q_q_pelatent_cachekv_akvk_nopevk_pekattn_outputoutputr<   r<   r=   forward   sN   


zMiniCPM3Attention.forward)r   NNr   )__name__
__module____qualname__r   intr   r   strr*   rH   TensorrX   __classcell__r<   r<   r:   r=   r   4   sN    	
Sr   c                   @   s   e Zd Zdd ZdS )MiniCPM3DecoderLayerc                 C   sf   t | jj| jjd| _t| j| j| jj| jj| jj| jj	| jj
| jj| j| j| j| j dd| _d S )Nr$   z
.self_attn)r   r   r   r   r   r   r   r   r   r   r    r!   )r   r   r   r/   input_layernormr   num_attention_headsr   r   r   r   r   r   r   r    r!   	self_attn)r8   r<   r<   r=   _init_attn_block   s"   
z%MiniCPM3DecoderLayer._init_attn_blockN)rY   rZ   r[   rd   r<   r<   r<   r=   r`      s    r`   c                	   @   s.   e Zd ZdedededB dedB fddZdS )MiniCPM3Modelr!   r   r   Nr    c                    s2   t j fdd| dd\| _| _| _d S )Nc                    s   t  | dS )Nr!   )r`   rf   r   r   r    r<   r=   <lambda>   s    z,MiniCPM3Model._init_layers.<locals>.<lambda>z.layersrf   )r   num_hidden_layersstart_layer	end_layerlayers)r8   r!   r   r   r    r<   rg   r=   _init_layers   s
   zMiniCPM3Model._init_layers)rY   rZ   r[   r]   r   r   r   rm   r<   r<   r<   r=   re      s    re   c                   @   s0   e Zd ZdddgiZdddedefdd	Zd
S )MiniCPM3ForCausalLMgate_up_proj	gate_projup_projr   rf   vllm_configr!   c                C   s   t ||dS )N)rr   r!   )re   )r8   rr   r!   r<   r<   r=   _init_model   s   zMiniCPM3ForCausalLM._init_modelN)rY   rZ   r[   packed_modules_mappingr   r]   rs   r<   r<   r<   r=   rn      s    rn   )!__doc__rH   r   transformersr   vllm.configr   r   vllm.distributedr   $vllm.model_executor.layers.attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr	   r
   r   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   "vllm.model_executor.models.minicpmr   r   r   utilsr   Moduler   r`   re   rn   r<   r<   r<   r=   <module>   s$    