o
    i5                     @   s  d dl Z d dlmZmZ d dl mZ d dlZd dlmZ d dlmZ d dl	m
Z
 d dlmZmZmZ d dlmZmZmZmZmZmZ d d	lmZ d d
lmZmZ d dlmZ d dlmZ d dlm Z m!Z!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+ d dl,m-Z- d dl.m/Z/m0Z0 d dl1m2Z2m3Z3 d dl4m5Z5m6Z6m7Z7 d dl8m9Z9m:Z:m;Z;m<Z<m=Z=m>Z>m?Z?m@Z@ d dlAmBZB d dlCmDZD d dlEmFZF d dlGmHZH d dlImJZJ d dlKmLZL deMfddZNG dd  d ejOZPG d!d" d"ejOZQG d#d$ d$ejOZRG d%d& d&ejOZSG d'd( d(ejOZTG d)d* d*ejOZUe
G d+d, d,ejOZVG d-d. d.ejOe7e6ZWG d/d0 d0eWe5ZXG d1d2 d2eWZYG d3d4 d4eYZZG d5d6 d6eXZ[G d7d8 d8eXZ\dS )9    N)CallableIterable)Any)nn)PretrainedConfig)support_torch_compile)CacheConfigParallelConfig
VllmConfig)get_ep_groupget_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_sizeget_tp_group tensor_model_parallel_all_gather)
SiluAndMul)	AttentionStaticSinkAttention)SharedFusedMoE)RMSNorm)ColumnParallelLinearMergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)
MLAModulesMultiHeadLatentAttentionWrapper)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)MixtureOfExpertsSupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixsequence_parallel_chunk)set_weight_attrs)current_platform)IntermediateTensors)set_default_rope_theta)AttentionType)FlashAttentionDiffKVBackendact_fnc                 C   s   | dkrt d|  dd S )NsiluzUnsupported activation: z!. Only silu is supported for now.)
ValueError)r5    r8   Z/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/openpangu.pycheck_ffn_act_fn[   s
   
r:   c                       sd   e Zd Z					ddededededB d	ed
ededdf fddZdej	dej	fddZ
  ZS )OpenPanguMLPNFT hidden_sizeintermediate_size
hidden_actquant_configbiasreduce_resultsprefixreturnc	           	   	      s^   t    t||gd |||| dd| _t||||||| dd| _t| t | _d S )N   .gate_up_proj)rA   r@   
disable_tprC   z
.down_proj)rA   r@   rB   rG   rC   )	super__init__r   gate_up_projr   	down_projr:   r   r5   )	selfr=   r>   r?   r@   rA   rB   is_sequence_parallelrC   	__class__r8   r9   rI   c   s(   

zOpenPanguMLP.__init__xc                 C   s   |  | | |d d S )Nr   )rK   r5   rJ   )rL   rP   r8   r8   r9   forward   s   zOpenPanguMLP.forward)NFTFr<   )__name__
__module____qualname__intstrr   boolrI   torchTensorrQ   __classcell__r8   r8   rN   r9   r;   b   s0    	
!r;   c                	       sN   e Zd Z		ddedededB def fddZd	ej	d
ej	fddZ
  ZS )OpenPanguMoENr<   configparallel_configr@   rC   c              	      s  t    t | _t j| _|j| _t j	| _
| j
 | _| j
 | _|j| _|j| _|j| _t|j t|j|jdd | dd| _t|drZ|jrZttj| jtjd| j_nd | j_|j }|j!| _!|j"| _#| j| _$| j$| j# | _%| j%| j | _&| j| j& | _'| j'| j& | _(|jd ur|j)|j }t*|j||j|| jd| dd| _+nd | _+t,di d| j+d	|jd
|j-d|jd|j)ddd|j.d|ddddddd| dddddd| jjd| j!d| j#d| j| _/d S ) NFz.gaterA   r@   rC   router_enable_expert_biasdtypez.shared_experts)r=   r>   r?   r@   rM   rB   rC   shared_expertsnum_expertstop_kr=   r>   rB   renormalizer@   use_grouped_topkTnum_expert_group   
topk_grouprC   z.expertsscoring_funcsigmoidrouted_scaling_factor      ?e_score_correction_biasenable_eplbnum_redundant_expertsrM   r8   )0rH   rI   r   tp_sizer   rank_in_grouptp_rankrl   r   device_groupep_grouprankep_ranksizeep_sizen_routed_expertsn_shared_expertsuse_sequence_parallel_moerM   r:   r?   r   r=   gatehasattrr_   r   	ParameterrX   emptyfloat32rn   eplb_configro   rp   n_redundant_expertsn_logical_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endmoe_intermediate_sizer;   rb   r   num_experts_per_toknorm_topk_probexperts)rL   r\   r]   r@   rC   r   r>   rN   r8   r9   rI      s   








	


zOpenPanguMoE.__init__hidden_statesrD   c           	      C   s   |j \}}|d|}| jrt|}| |\}}| j||d}|\}}| jd u r/|d u s/J |jtj	kr;|| j
9 }n| jd urM|d usFJ |d| j
 9 }| jd ur\|d usXJ ||7 }| jrkt|d}|d | }n| jdkrv| j|}|||S )N)r   router_logitsrm   r   rh   )shapeviewrM   r.   r}   r   rb   ra   rX   float16rl   r   rq   &maybe_all_reduce_tensor_model_parallel)	rL   r   
num_tokens
hidden_dimr   _fused_moe_outshared_outputfinal_hidden_statesr8   r8   r9   rQ      s:   




zOpenPanguMoE.forward)Nr<   )rR   rS   rT   r   r	   r   rV   rI   rX   rY   rQ   rZ   r8   r8   rN   r9   r[      s"    Xr[   c                       s   e Zd Z				ddededededed	ed
edB dedededB dedB deddf fddZde	j
de	j
de	j
fddZ  ZS )OpenPanguMLAAttention    Nr<   r\   r=   	num_headsqk_nope_head_dimqk_rope_head_dim
v_head_dimq_lora_rankkv_lora_rankmax_position_embeddingscache_configr@   rC   rD   c                    s  t    || _|| _|| _|| _|| | _|| _|| _|| _	t
 | _|| j dkr6td| d| j d|| j | _| jd | _|	| _|| _| jd ur~t| j| j| j	| j gd|| ddd	| _t| j|jd
| _t|| j| j d|| dd| _n$t| j| j| j d|| dd| _t| j| j	| j d|| dd| _t| j	|jd
| _t| j	| j| j| j  d|| dd| _t| j| j | jd|| dd| _t|dd |j d ddddd|	ddd	}t!||	|dd| _"t#| j| j| j"| j| jd ur| jnd | jd u r| jnd | jd ur| jnd | jd ur | jnd | jd u r*| jnd d dd d}t$| j| j| j| j| j| j| j| j	||
||| _%d S )Nr   z
num_heads  is not divisible by tp_size .      F.fused_qkv_a_projT)rA   r@   rC   rG   epsz	.q_b_projr^   .q_proj.kv_a_proj_with_mqaz
.kv_b_proj.o_proji'  )default_theta
rope_theta    rh   rm   yarndeepseek_yarn)	r   	beta_fast	beta_slowfactormscalemscale_all_dim original_max_position_embeddingstype	rope_typemax_positionrope_parametersis_neox_style)kv_a_layernorm	kv_b_proj
rotary_embo_projfused_qkv_a_projkv_a_proj_with_mqaq_a_layernormq_b_projq_projindexer	is_sparsetopk_indices_buffer)&rH   rI   r=   r   r   r   qk_head_dimr   r   r   r   rq   r7   num_local_headsscalingr   rC   r   r   r   rms_norm_epsr   r   r   r   r   r   r   r   r   r   r2   r   r   r   r   r   mla_attn)rL   r\   r=   r   r   r   r   r   r   r   r   r@   rC   r   mla_modulesrN   r8   r9   rI     s   







	
zOpenPanguMLAAttention.__init__	positionsr   c                 C   s   |  ||S N)r   )rL   r   r   r8   r8   r9   rQ     s   zOpenPanguMLAAttention.forward)r   NNr<   )rR   rS   rT   r   rU   r   r   rV   rI   rX   rY   rQ   rZ   r8   r8   rN   r9   r     sP    	
 r   c                       s   e Zd Zddddddejfdedededed	ed
edB dedede	dB de
de
ddf fddZdejdejdejfddZded
edB ddfddZ  ZS )OpenPanguEmbeddedAttentionr   NFr<   r\   r=   r   num_kv_headsr   r@   rA   bias_o_projr   rC   	attn_typerD   c                    s  t    t|
}|| _t }|| _| j| dkr%td| j d| d| j| | _|| _| j|krF| j| dkrFtd| j d| d| j|k r^|| j dkr^td| d| j dt	d| j| | _
t|d	d }|d u rw| j| j }|| _| j| j | _| j
| j | _| jd
 | _|| _t|| j| j| j|||
 dd| _t| j| j ||||
 dd| _| j||d t|dr|j}t|tr|}nt|tr|t| }|| }ntt| dd }t| j| j| j| j
|	||||
 dd	| _d S )Nr   total_num_heads r   r   CNumber of KV heads is greater than TP size, but total_num_kv_heads z5Number of KV heads is less than TP size, but tp_size z( is not divisible by total_num_kv_heads rh   head_dimr   	.qkv_proj)r=   	head_sizetotal_num_headstotal_num_kv_headsrA   r@   rC   r   
input_sizeoutput_sizerA   r@   rC   )r@   interleaved_sliding_window1 for interleaved_sliding_window is not supported..attn)r   r   r@   per_layer_sliding_windowr   rC   )rH   rI   r)   r=   r   r   r7   r   r   maxr   getattrr   q_sizekv_sizer   r   r   qkv_projr   r   _init_rotary_embr~   r   
isinstancerU   listlenr   r   attn)rL   r\   r=   r   r   r   r@   rA   r   r   rC   r   	layer_idxrq   r   r   sliding_windowsw_idxrN   r8   r9   rI     s   







z#OpenPanguEmbeddedAttention.__init__r   r   c           
      C   s`   |  |\}}|j| j| j| jgdd\}}}| |||\}}| |||}| |\}	}|	S )Nr   dim)r   splitr   r   r   r   r   
rL   r   r   qkvr   qkvattn_outputoutputr8   r8   r9   rQ     s    z"OpenPanguEmbeddedAttention.forwardc                 C   sf   d}|o	|  dk}|r|jdkrd}|jpi }|d ur&|ddr&d|d< t| j| j||d| _d S )	NTggufPanguEmbeddedFmrope_interleaved	openpangur   r   )get_name
model_typer   getr   r   r   r   )rL   r\   r@   r   is_ggufr   r8   r8   r9   r     s   
z+OpenPanguEmbeddedAttention._init_rotary_emb)rR   rS   rT   r3   DECODERr   rU   r   rW   r   rV   rI   rX   rY   rQ   r   rZ   r8   r8   rN   r9   r     s^    	
d
r   c                       s   e Zd Zdddddddejfdedededed	eee	f dB d
ede
dB dedededB dededdf fddZdejdejfddZdejdejdejfddZded	eee	f dB de
dB ddfddZd ddZ  ZS )!OpenPanguSinkAttentionNr   Fr<   r\   r=   r   r   r   r   r@   rA   r   r   rC   r   rD   c                    sf  t    t|}|| _t | _t | _|| _| j| j dkr,t	d| j d| j d| j| j | _
|| _| j| jkrQ| j| j dkrQt	d| j d| j d| j| jk rdt	d| j d| j dtd	| j| j | _t|d
d | _t|dd | _t|dd | _| j| j | _| j
| j | _| j| j | _| j| j | _| jd | _|| _t|dd| _t|dd| _t|dd | _t|dd| _t|| j| j | j| j | j| j g||| dd| _t| j| j ||	|| dd| _t | j|j!d| _"| j#|||d t$|dr.|j%}t&|t'r|}nt&|t(r%|t)| }|| }nt	t*| dd }t+,| j t-| j
| j| j| j| j|
|||| dt+| jd| _.| jdkrt/j01t/j2| j| j| jft34 |j5d| _6t7| j6d	| j8d | jrt/j01t/j2| j| j| jft34 |j5d| _9t7| j9d	| j8d nt/j:| j| j| jft34 |j5d| _9| ;  d S )Nr   r   r   r   r   zNumber of KV heads z is less than TP size z*, KV heads replication is not support yet.rh   qk_nope_dimqk_rope_dim
v_channelsr   param_sink_numberparam_sink_with_valueFparam_sink_scalarparam_sink_of_head_dimr   )r   output_sizesrA   r@   rC   r   r   r   )r   r@   r   r   r   )	sink_lenr   r   r@   r   r   rC   attn_backendhead_size_v)devicera   )
output_dimweight_loader)<rH   rI   r)   r=   r   rq   r   rs   r   r7   r   r   r   r   r   r  r  r  r   r   k_sizev_sizer   r   r  r	  r
  param_sink_of_head_numr   r   r   r   r   r   k_layernormr   r~   r   r   rU   r   r   r   r4   set_head_size_vr   r   rX   r   r   r   r0   current_devicetorch_dtypeparam_sink_keyr/   r  param_sink_valuezerospost_weight_load)rL   r\   r=   r   r   r   r   r@   rA   r   r   rC   r   r   r   r   r   rN   r8   r9   rI   +  s
  








zOpenPanguSinkAttention.__init__paramloaded_weightc                 C   s  t |dd }t |dd}t |dd}|p|}t |dd}t |dd}|r)| |_|rVt|tjrVt|j}|d urN|| | j dksEJ || | j ||< |j	||j
d |j}	|d urp|sp|	j| }
| j|
 }||||
}t|jdkr||d	}|	j|jksJ |	| d S )
Nr  is_sharded_weightFuse_bitsandbytes_4bitis_gguf_weightis_gguf_weight_typer   r`   rh   )r   itemweight_typer   r   UninitializedParameterr   r   rq   materializera   datars   narrowr   reshapecopy_)rL   r  r  r  r   r!  r"  r#  final_shape
param_data
shard_size	start_idxr8   r8   r9   r    s.   




z$OpenPanguSinkAttention.weight_loaderr   r   c           
   	   C   s   |  |\}}|j| j| j| jgdd\}}}| |d| j| j}| 	|||\}}|d| j}|d| j}| j
|||t|jd |jd | j | j gd}| |\}	}|	S )Nr   r   r   rh   )output_shape)r   r   r   r  r  r  r   r   r   r   r   rX   Sizer   r  r   r   r8   r8   r9   rQ     s     zOpenPanguSinkAttention.forwardc                 C   s.   d}d| j | j i}t| j| j||d| _d S )NFpartial_rotary_factorr   )r  r   r   r   r   )rL   r\   r   r@   r   r8   r8   r9   r     s   z'OpenPanguSinkAttention._init_rotary_embc                 C   s<   t | dr| jd ur| | j}n| j}| j|| j d S )Nr  )r~   r  r  r   update_sink_kvr  )rL   r  r8   r8   r9   r  #  s   z'OpenPanguSinkAttention.post_weight_loadrD   N)rR   rS   rT   r3   r  r   rU   dictrV   r   r   rW   r   rI   r   r   rX   rY   r  rQ   r   r  rZ   r8   r8   rN   r9   r  *  sn    	
 ,%

r  c                       sT   e Zd Zdedededdf fddZdejd	ejd
ejdB dejfddZ	  Z
S )OpenPanguDecoderLayerr\   rC   vllm_configrD   Nc                    s  t    |d u r|jj}|j}|j}|j}|j| _t|dd}t	|j
ddd }|| _t|do@t|do@t|do@t|d	| _t|d
oK|jdk| _| jrst|| j|j|j|j|jt|drd|jnd |j|||| dd| _n| jrt|ddpt|dd}	|	}
t|dr|j}	t|ddrtj}n	td|j dt|dd }|d u rd|jd}t|| j|jt|d|j||||	|
|| d|d| _n=t|ddpt|dd}	|	}
t|dr|j}	t|ddrtj}ntj}t || j|jt|d|j|||	|
|| d|d| _t|dd d ur$||j!kr$t"|||| dd| _#nt$| j|j%|j&|t|d d| dd!| _#t|d"d#| _'|j(| _(t|d$| j(| _!t)|j|j*d%| _+t)|j|j*d%| _,t- j.| _/t|d&d| _0| j0rt)|j|j*d%| _1t)|j|j*d%| _2d S d S )'Nr   r   r   )sepr   r   r   r   r   r  r   r   z
.self_attn)r\   r=   r   r   r   r   r   r   r   r   r@   rC   attention_biasFrA   qkv_bias	is_causalTz
is_causal=z' is not support for attention with sinkrope_scalingdefault)r   r   num_key_value_heads)r\   r=   r   r   r   r   r@   rA   r   r   rC   r   )r\   r=   r   r   r   r@   rA   r   r   rC   r   rz   z.mlp)r\   r]   r@   rC   mlp_bias)r=   r>   r?   r@   rA   rC   rl   rm   first_k_dense_replacer   sandwich_norm)3rH   rI   model_config	hf_configr   r@   r]   r=   r   rU   r   r   r~   use_mlar  use_sink_attentionr   num_attention_headsr   r   r   r   r   	self_attnr:  r3   r  r7   r;  r   r  ENCODER_ONLYr   r@  r[   mlpr;   r>   r?   rl   num_hidden_layersr   r   input_layernormpost_attention_layernormr   rt   tp_grouprA  pre_mlp_layernormpost_mlp_layernorm)rL   r\   rC   r7  r   r@   r]   r   r   r9  r   r   r   rN   r8   r9   rI   -  s   








zOpenPanguDecoderLayer.__init__r   r   residualc                 C   s   |d u r|  }| |}n| ||\}}| j||d}| jd ur;|jtjkr;|d| j 9 }| jdkr;|d| j 9 }| jrL| 	|}| 
||\}}n| 	||\}}| |}| jd urqt| jtrq|jtjkrq|d| j 9 }| jry| |}||fS )N)r   r   rm   r   )clonerK  rG  rl   ra   rX   r   r   rA  rL  rN  rI  r   r;   rO  )rL   r   r   rP  r8   r8   r9   rQ     s8   






zOpenPanguDecoderLayer.forward)rR   rS   rT   r   rV   r
   rI   rX   rY   rQ   rZ   r8   r8   rN   r9   r6  ,  s(     r6  c                       s8  e Zd ZdZdddedef fddZdejd	ejfd
dZ		d"dejdB dejde
dB dejdB d	eje
B f
ddZdeeeeef  deeef dedejdee d	efddZdeeeeeef  deeef dedejdee deeef d	efddZdeeeejf  d	ee fddZd#d d!Z  ZS )$OpenPanguModelFr<   rC   r7  rC   c                   s   t    jj j}jj} | _|j| _ j	| _
 j| _t js* jr9t jr9t j j|| dd| _nt | _t j fdd| dd\| _| _| _t jrat j jd| _nt | _tdd	g j| _d S )
Nz.embed_tokensr@   rC   c                    s   t  | S r   )r6  rS  r\   r7  r8   r9   <lambda>  s    z)OpenPanguModel.__init__.<locals>.<lambda>z.layersrS  r   r   rP  )rH   rI   rB  rC  r@   r]   r   r\   rp   pad_token_idpadding_idx
vocab_sizer   is_first_ranktie_word_embeddingsis_last_rankr!   r=   embed_tokensr(   r,   rJ  start_layer	end_layerlayersr   r   normr+   make_empty_intermediate_tensors)rL   r7  rC   r@   r   rN   rU  r9   rI     s>   



zOpenPanguModel.__init__	input_idsrD   c                 C   s
   |  |S r   )r]  rL   rc  r8   r8   r9   embed_input_ids%  s   
zOpenPanguModel.embed_input_idsNr   intermediate_tensorsinputs_embedsc           
      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| jD ]}| j| }||||\}}q(t  jsCt||dS | 	||\}}	|S )Nr   rP  )r   rP  )
r   rZ  re  ranger^  r_  r`  r\  r1   ra  )
rL   rc  r   rf  rg  r   rP  ilayerr   r8   r8   r9   rQ   (  s"   

zOpenPanguModel.forwardattn_mlp_replace_mappingparams_dictweight_namer  loaded_paramsc                 C   s   |D ]G\}}}||vsd|v r||vrq| ||}	|dkr#|	|vr#q|	}|dr/||vr/qt|| r5q|| }
|
j}||
|| ||  dS dS )Nzmlp.experts.r   .biasTF)replaceendswithr*   r  add)rL   rk  rl  rm  r  rn  
param_nameorigin_nameshard_idweight_name_mappedr  r  r8   r8   r9   load_attn_mlp_weightF  s&   

z#OpenPanguModel.load_attn_mlp_weightexpert_merge_mapping	flag_dictc              	   C   s   |D ]B}|\}}	}
}|	|vrqd|d< | |	|}t|| rq|| }ttdtf |j}||||||
dd}|rD|}||  dS qdS )NTis_expert_weight.)ru  	expert_idreturn_successF)rp  r*   typingcastr   rW   r  rr  )rL   rx  rl  rm  r  rn  ry  mappingrs  rt  r{  ru  rv  r  r  successr8   r8   r9   load_expert_weightg  s0   	

z!OpenPanguModel.load_expert_weightweightsc              	   C   s  g d}t | jd}|rtj| ddd| jj| jd}t|  }t }|D ]\}}d|v r.q%| jj	r7d|v r7q%d	|v rft | jd
rf| jj
dkrft|dd dd }	|	| jj }
|
dkrf|
| jj
k rfq%ddi}| |||||s|r| ||||||rq%|d rq%|dr||vrq%t||}|dr|dd}|d u rq%t|| rq%|| }t|dt}||| || q%|   |S )N))r   r   r   )r   z.k_projr   )r   z.v_projr   )r   z	.q_a_projr   )r   r   rh   )rF   z
.gate_projr   )rF   z.up_projrh   rz   	gate_projrK   up_proj)ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namerc   rp   zrotary_emb.inv_freqzlm_head.weightr`  num_nextn_predict_layersr   zlayers.r   r   rz  Fro  rn   zgate.e_score_correction_biasr  )r~   r\   r   make_expert_params_mappingrz   rp   r5  named_parameterssetr[  r  rU   r   rJ  rw  r  rq  r#   rp  r*   r   r"   rr  r  )rL   r  rk  has_expertsrx  rl  rn  namer  r   mtp_idxry  r  r  r8   r8   r9   load_weights  s   		
	



zOpenPanguModel.load_weightsc                 C   s2   |   D ]\}}|| u rqt|dr|  qd S )Nr  )named_modulesr~   r  )rL   r  moduler8   r8   r9   r    s   
zOpenPanguModel.post_weight_loadr   r4  )rR   rS   rT   fall_back_to_pt_during_loadr
   rV   rI   rX   rY   re  r1   rQ   r   tuplerU   r5  r   r  rW   rw  r  r   r  r  rZ   r8   r8   rN   r9   rR    s\    &


!


$!RrR  c                       s   e Zd Zg dddgdZdddedef fd	d
ZdejdejfddZ			ddejdB dejde
dB dejdB deje
B f
ddZdejdejdB fddZdeeeejf  dee fddZ  ZS )OpenPanguModelBase)r   k_projv_projr  r  )r   rJ   r<   rS  r7  rC   c                   s   t    |jj}|j}|| _|| _t|do|jd u| _| jr'ddg| j	d< t
|t|dd| _t jrNt|j|j|t|dd| _|jrM| jjj| j_nt | _t|j| _| jj| _d S )	Nr   q_a_projr   r   modelr7  rC   lm_headrT  )rH   rI   rB  rC  r@   r\   r~   r   fuse_qkv_a_projpacked_modules_mappingrR  r-   r  r   r\  r    rY  r=   r  r[  r]  weightr(   r   logits_processorrb  )rL   r7  rC   r\   r@   rN   r8   r9   rI     s8   


zOpenPanguModelBase.__init__rc  rD   c                 C   s   | j |S r   )r  re  rd  r8   r8   r9   re    s   z"OpenPanguModelBase.embed_input_idsNr   rf  rg  c                 C   s   |  ||||}|S r   )r  )rL   rc  r   rf  rg  r   r8   r8   r9   rQ     s   zOpenPanguModelBase.forwardr   c                 C   s   |  | j|}|S r   )r  r  )rL   r   logitsr8   r8   r9   compute_logits  s   z!OpenPanguModelBase.compute_logitsr  c                 C   s$   t | | jjr	dgnd d}||S )Nzlm_head.)skip_prefixes)r'   r\   r[  r  )rL   r  loaderr8   r8   r9   r  !  s
   
zOpenPanguModelBase.load_weights)NN)rR   rS   rT   r  r
   rV   rI   rX   rY   re  r1   rQ   r  r   r  r  r  rZ   r8   r8   rN   r9   r    s2    #

,r  c                       sB   e Zd Zdddedef fddZdeded	d
fddZ  ZS )OpenPanguMoEModelr<   rS  r7  rC   c                   s   t  j||d |jj}g | _|j|j | _d| _g | _	d }| j
jD ] }t|tr*q"t|ts1J t|jtrB|j}| j	|jj q"|d u rKtd|j| _|j| _|j| _|j| _|j| _|j| _d S )Nr  rh   z#No MOE layer found in model.layers.)rH   rI   rB  rC  expert_weightsrJ  r@  num_moe_layersnum_expert_groups
moe_layersr  r`  r   r(   r6  rI  r[   appendr   RuntimeErrorr   num_logical_expertsr   num_physical_expertsr   num_local_physical_expertsrz   r{   r   rp   )rL   r7  rC   r\   example_moerj  rN   r8   r9   rI   *  s.   
zOpenPanguMoEModel.__init__r  r  rD   Nc                 C   sh   | j |ksJ || _|| _ || j | _| jjD ]}t|jtr1|j}||_	||_
| j|_|j  qd S r   )r  r  r  rp   r  r`  r   rI  r[   r   r   r   r   update_expert_map)rL   r  r  rj  moer8   r8   r9    update_physical_experts_metadataI  s   
z2OpenPanguMoEModel.update_physical_experts_metadata)	rR   rS   rT   r
   rV   rI   rU   r  rZ   r8   r8   rN   r9   r  )  s    r  c                       s,   e Zd Zdddedef fddZ  ZS )OpenPanguEmbeddedModelr<   rS  r7  rC   c                   s   t  j||d d S )Nr  )rH   rI   )rL   r7  rC   rN   r8   r9   rI   \  s   zOpenPanguEmbeddedModel.__init__)rR   rS   rT   r
   rV   rI   rZ   r8   r8   rN   r9   r  [  s    $r  c                   @      e Zd ZdS )PanguEmbeddedForCausalLMNrR   rS   rT   r8   r8   r8   r9   r  `      r  c                   @   r  )PanguUltraMoEForCausalLMNr  r8   r8   r8   r9   r  d  r  r  c                   @   r  )PanguProMoEV2ForCausalLMNr  r8   r8   r8   r9   r  h  r  r  )]r}  collections.abcr   r   r   rX   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r	   r
   vllm.distributedr   r   r   r   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   r   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   r   +vllm.model_executor.layers.logits_processorr   vllm.model_executor.layers.mlar   r   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr    r!   -vllm.model_executor.model_loader.weight_utilsr"   r#   %vllm.model_executor.models.interfacesr$   r%   r&    vllm.model_executor.models.utilsr'   r(   r)   r*   r+   r,   r-   r.   vllm.model_executor.utilsr/   vllm.platformsr0   vllm.sequencer1   vllm.transformers_utils.configr2   vllm.v1.attention.backendr3   ,vllm.v1.attention.backends.flash_attn_diffkvr4   rV   r:   Moduler;   r[   r   r   r  r6  rR  r  r  r  r  r  r  r8   r8   r8   r9   <module>   sd    (
&   
   P gG2