o
    iAL                     @   s  d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	 d dl
mZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZmZ d dlmZ d dlmZmZmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$m%Z% d dl&m'Z'm(Z( d dl)m*Z* d dl+m,Z, ddl-m.Z.m/Z/ ddl0m1Z1m2Z2m3Z3m4Z4m5Z5m6Z6 ee7Z8G dd dej9Z:G dd dej9Z;G dd dej9Z<e	G dd dej9Z=G d d! d!ej9e.e/Z>dS )"    )Iterable)isliceN)nn)Gemma3TextConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)init_logger)
GeluAndMul)	AttentionEncoderOnlyAttention)GemmaRMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)AttentionType   )SupportsLoRA
SupportsPP)AutoWeightsLoaderextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sV   e Zd Z		ddededededB deddf fd	d
ZdejdejfddZ	  Z
S )	Gemma3MLPN hidden_sizeintermediate_sizehidden_activationquant_configprefixreturnc                    sd   t    t||gd d|| dd| _t||d|| dd| _|dkr*tdtdd	| _d S )
N   Fz.gate_up_projbiasr*   r+   z
.down_projgelu_pytorch_tanhzGemma3 uses `gelu_pytorch_tanh` as the hidden activation function. Please set `hidden_act` and `hidden_activation` to `gelu_pytorch_tanh`.tanh)approximate)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr'   r(   r)   r*   r+   	__class__ W/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/gemma3.pyr4   D   s(   
zGemma3MLP.__init__xc                 C   s*   |  |\}}| |}| |\}}|S N)r5   r8   r6   )r9   r>   gate_up_r<   r<   r=   forwardc   s   
zGemma3MLP.forward)Nr&   )__name__
__module____qualname__intstrr   r4   torchTensorrB   __classcell__r<   r<   r:   r=   r%   C   s"    r%   c                       s|   e Zd Z				ddedededededed	edB d
edB dedB deddf fddZ	de
jde
jde
jfddZ  ZS )Gemma3AttentionNr&   configr'   	num_headsnum_kv_headshead_dimmax_position_embeddingscache_configr*   attn_logits_soft_capr+   r,   c                    s  t    || _|| _t }|| _| j| dksJ | j| | _|| _| j|kr2| j| dks1J n	|| j dks;J td| j| | _	|| _
| j| j
 | _| j	| j
 | _|jd | _t|| j
| j| j|j||
 dd| _t| j| j
 ||j||
 dd| _t| j
|jd| _t| j
|jd| _t|
}|j| }|dk| _| jr|jnd }||jv r|j| }n|j}| jrtd	|jd
}t| j
||dd| _ t!|ddrt"j#}nt"j$}|t"j$krt%nt&}|| j| j
| j| j	||||	||
 dd
| _'d S )Nr   r   g      z	.qkv_projr.   z.o_projepssliding_attentiondefault)	rope_type
rope_thetaT)max_positionrope_parametersis_neox_style	is_causalz.attn)rN   rQ   r*   	attn_typelogits_soft_capper_layer_sliding_windowr+   )(r3   r4   rL   r'   r
   total_num_headsrM   total_num_kv_headsmaxrN   rO   q_sizekv_sizequery_pre_attn_scalarscalingr   attention_biasqkv_projr   o_projr   rms_norm_epsq_normk_normr    layer_types
is_slidingsliding_windowrZ   dictrope_local_base_freqr   
rotary_embgetattrr   DECODERENCODER_ONLYr   r   attn)r9   rL   r'   rM   rN   rO   rP   rQ   r*   rR   r+   tp_size	layer_idx
layer_typero   rZ   r]   attn_clsr:   r<   r=   r4   k   s   

	




zGemma3Attention.__init__	positionshidden_statesc                 K   s   |  |\}}|j| j| j| jgdd\}}}|d| j| jf}| |}|dd}|d| j	| jf}| 
|}|dd}| |||\}}| |||}	| |	\}
}|
S )N)dim)rh   splitrc   rd   	unflattenrM   rO   rk   flattenrN   rl   rr   rv   ri   )r9   r{   r|   kwargsqkvrA   qkvattn_outputoutputr<   r<   r=   rB      s    

zGemma3Attention.forward)NNNr&   )rC   rD   rE   r   rF   r   r   floatrG   r4   rH   rI   rB   rJ   r<   r<   r:   r=   rK   j   sF    		
irK   c                       sr   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )Gemma3DecoderLayerNr&   rL   rQ   r*   r+   r,   c                    s   t    |j| _t|| j|j|j|j|j||d | dd
| _|j| _t	| j|j
|j|| dd| _t|j|jd| _t|j|jd| _t|j|jd| _t|j|jd| _d S )Nz
.self_attn)
rL   r'   rM   rN   rO   rP   rQ   r*   rR   r+   z.mlp)r'   r(   r)   r*   r+   rS   )r3   r4   r'   rK   num_attention_headsnum_key_value_headsrO   rP   	self_attnr%   r(   r)   mlpr   rj   input_layernormpost_attention_layernormpre_feedforward_layernormpost_feedforward_layernorm)r9   rL   rQ   r*   r+   r:   r<   r=   r4      s@   
zGemma3DecoderLayer.__init__r{   r|   residualc                 K   st   |d u r|}|  |}n|  ||\}}| jd||d|}| |}| ||\}}| |}| |}||fS )N)r{   r|   r<   )r   r   r   r   r   r   )r9   r{   r|   r   r   r<   r<   r=   rB     s"   


zGemma3DecoderLayer.forward)NNr&   )rC   rD   rE   r   r   r   rG   r4   rH   rI   tuplerB   rJ   r<   r<   r:   r=   r      s0    (r   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )Gemma3Modelr&   r+   vllm_configr+   c                   s   t    |jj|j |j| _| _tjj	| dd| _
tj fdd| dd\| _| _| _tj	jd| _| jj	d }| jd	t|d
d tddgj	| _d S )Nz.embed_tokensr*   r+   c                    s   t  | dS )Nr   )r   r   rQ   rL   r*   r<   r=   <lambda>@  s    z&Gemma3Model.__init__.<locals>.<lambda>z.layersr   rS   g      ?
normalizerF)
persistentr|   r   )r3   r4   model_config	hf_configrQ   r*   rL   r   
vocab_sizer'   embed_tokensr#   num_hidden_layersstart_layer	end_layerlayersr   rj   normregister_bufferrH   tensorr"   make_empty_intermediate_tensors)r9   r   r+   r   r:   r   r=   r4   0  s.   


zGemma3Model.__init__	input_idsr,   c                 C   s   |  || j S r?   )r   r   r9   r   r<   r<   r=   embed_input_idsQ  s   zGemma3Model.embed_input_idsNr{   intermediate_tensorsinputs_embedsc           
      K   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]}||||fi |\}}q*t  jsDt||dS | 	||\}}	|S )Nr|   r   )r|   r   )
r	   is_first_rankr   r   r   r   r   is_last_rankr   r   )
r9   r   r{   r   r   r   r|   r   layerrA   r<   r<   r=   rB   V  s,   
zGemma3Model.forwardweightsc                 C   s  g d}t |  }t }|D ]\}}| jr&| j dkr&|dr&|d8 }| jd urL| j| }rL|| }t|dt}	|d }|	|| |	| q|drst
||}
|
d urs|
|v rs||
 }t|dt}	|	|| |	|
 q|D ].\}}}||vrqu|||}|dr||vrqut|| rqu|| }|j}	|	|||  n)|dr||vrqt
||}|d u rqt|| rq|| }t|dt}	|	|| |	| q|S )	N))rh   q_projr   )rh   k_projr   )rh   v_projr   )r5   	gate_projr   )r5   up_projr   ggufznorm.weightr   weight_loaderr   )z.k_scalez.v_scalez.q_scalez.prob_scalez.bias)rp   named_parameterssetr*   get_nameendswithget_cache_scalers   r   addr   replacer!   r   )r9   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   remapped_name
param_name
shard_nameshard_idr<   r<   r=   load_weightsv  sj   










zGemma3Model.load_weightsr?   )rC   rD   rE   r   rG   r4   rH   rI   r   r   rB   r   r   r   r   rJ   r<   r<   r:   r=   r   .  s     !

, r   c                       s   e Zd Zg dddgdZdddedef fd	d
ZdejdejfddZ			ddejdB dejde
dB dejdB deje
B f
ddZdejdejdB fddZdeeeejf  dee fddZ  ZS )Gemma3ForCausalLM)r   r   r   r   r   )rh   r5   r&   r   r   r+   c                   s   |j j}|j}t   || _|| _t|t|dd| _t	|j
|j|t|dd| _|jr6| j| jj| _t|j
|jd| _| jj| _d S )Nmodel)r   r+   lm_headr   )soft_cap)r   r   r*   r3   r4   rL   r   r$   r   r   r   r'   r   tie_word_embeddingstie_weightsr   r   final_logit_softcappinglogits_processorr   )r9   r   r+   rL   r*   r:   r<   r=   r4     s*   

zGemma3ForCausalLM.__init__r   r,   c                 C   s   | j |S r?   )r   r   r   r<   r<   r=   r     s   z!Gemma3ForCausalLM.embed_input_idsNr{   r   r   c                 K   s   | j ||||fi |}|S r?   )r   )r9   r   r{   r   r   r   r|   r<   r<   r=   rB     s   zGemma3ForCausalLM.forwardr|   c                 C   s   |  | j|}|S r?   )r   r   )r9   r|   logitsr<   r<   r=   compute_logits  s   z Gemma3ForCausalLM.compute_logitsr   c                 C   s$   t | | jjr	dgnd d}||S )Nzlm_head.)skip_prefixes)r   rL   r   r   )r9   r   loaderr<   r<   r=   r     s
   
zGemma3ForCausalLM.load_weights)NN)rC   rD   rE   packed_modules_mappingr   rG   r4   rH   rI   r   r   rB   r   r   r   r   r   rJ   r<   r<   r:   r=   r     s6    

,r   )?collections.abcr   	itertoolsr   rH   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   r   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.v1.attention.backendr   
interfacesr   r   utilsr   r    r!   r"   r#   r$   rC   loggerModuler%   rK   r   r   r   r<   r<   r<   r=   <module>   s>    	' D 