o
    پid7                     @   s2  d Z ddlmZmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z" G dd dej#Z$G dd dej#Z%G dd dej#Z&G dd dej#Z'G dd dej#Z(e(Z)dS )z?Inference-only Gemma model compatible with HuggingFace weights.    )IterableOptionalTupleN)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)
GeluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)VocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixc                       sD   e Zd Z		ddededee deddf
 fdd	Zd
d Z  Z	S )GemmaMLPN hidden_sizeintermediate_sizequant_configprefixreturnc                    sR   t    t||gd d|td|d| _t||d|td|d| _td| _d S )N   Fgate_up_projbiasr   r   	down_projnone)	super__init__r
   r   r   r   r    r   act_fn)selfr   r   r   r   	__class__ K/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/gemma.pyr#   ,   s    
zGemmaMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r   r$   r    )r%   xgate_up_r(   r(   r)   forwardD   s   
zGemmaMLP.forwardNr   )
__name__
__module____qualname__intr   r   strr#   r.   __classcell__r(   r(   r&   r)   r   +   s    r   c                       sv   e Zd Z					ddededed	ed
edededee deddf fddZde	j
de	j
dede	j
fddZ  ZS )GemmaAttentionr       '  Nr   r   	num_headsnum_kv_headshead_dimlayer_idmax_position_embeddings
rope_thetar   r   r   c
              
      s@  t    || _t }
|| _| j|
 dksJ | j|
 | _|| _| j|
kr/| j|
 dks.J n	|
| j dks8J td| j|
 | _|| _	| j| j	 | _
| j| j	 | _| j	d | _|| _t|| j	| j| jd|td|	d| _t| j| j	 |d|td|	d| _t| j	| j	|| jdd	| _t| j| j	| j| j||td
|	d| _d S )Nr      g      Fqkv_projr   o_projT)
rotary_dimmax_positionbaseis_neox_styleattn)r:   r<   r   r   )r"   r#   r   r   total_num_headsr9   total_num_kv_headsmaxr:   r;   q_sizekv_sizescalingr>   r   r   r@   r   rA   r   
rotary_embr   rF   )r%   r   r9   r:   r;   r<   r=   r>   r   r   tp_sizer&   r(   r)   r#   L   s`   

	
zGemmaAttention.__init__	positionshidden_statesforward_batchc                 C   sb   |  |\}}|j| j| j| jgdd\}}}| |||\}}| ||||}	| |	\}
}|
S )N)dim)r@   splitrJ   rK   rM   rF   rA   )r%   rO   rP   rQ   qkvr-   qkvattn_outputoutputr(   r(   r)   r.      s    zGemmaAttention.forward)r   r7   r8   Nr   )r0   r1   r2   r3   floatr   r   r4   r#   torchTensorr   r.   r5   r(   r(   r&   r)   r6   K   sH    	
Dr6   c                       sr   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )GemmaDecoderLayerr   Nr   configr<   r   r   r   c                    s   t    |j| _t| j|j|j|j||j|j|t	d|d	| _
t| j|j|t	d|d| _t|j|jd| _t|j|jd| _d S )N	self_attn)	r   r9   r:   r;   r<   r=   r>   r   r   mlp)r   r   r   r   eps)r"   r#   r   r6   num_attention_headsnum_key_value_headsr;   r=   r>   r   r`   r   r   ra   r	   rms_norm_epsinput_layernormpost_attention_layernorm)r%   r_   r<   r   r   r&   r(   r)   r#      s.   
zGemmaDecoderLayer.__init__rO   rP   rQ   residualc                 C   sZ   |d u r|}|  |}n|  ||\}}| j|||d}| ||\}}| |}||fS )N)rO   rP   rQ   )rg   r`   rh   ra   )r%   rO   rP   rQ   ri   r(   r(   r)   r.      s   
zGemmaDecoderLayer.forward)r   Nr   )r0   r1   r2   r   r3   r   r   r4   r#   r\   r]   r   r   r.   r5   r(   r(   r&   r)   r^      s4    r^   c                       sb   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dej	dej	f
ddZ  ZS )
GemmaModelNr   r_   r   r   r   c                    sZ   t     | _t j j| _t fddt	 j
D | _t j jd| _d S )Nc              	      s(   g | ]}t  |td | dqS )zlayers.r   r   )r^   r   ).0ir_   r   r   r(   r)   
<listcomp>   s    z'GemmaModel.__init__.<locals>.<listcomp>rb   )r"   r#   r_   r   
vocab_sizer   embed_tokensr   
ModuleListrangenum_hidden_layerslayersr	   rf   normr%   r_   r   r   r&   rn   r)   r#      s   
zGemmaModel.__init__	input_idsrO   rQ   input_embedsc           
      C   sp   |d u r
|  |}n|}|| jjd 9 }d }tt| jD ]}| j| }|||||\}}q| ||\}}	|S )N      ?)rq   r_   r   rs   lenru   rv   )
r%   rx   rO   rQ   ry   rP   ri   rm   layerr-   r(   r(   r)   r.      s   

zGemmaModel.forwardr/   r*   )r0   r1   r2   r   r   r   r4   r#   r\   r]   r   r.   r5   r(   r(   r&   r)   rj      s0    rj   c                       s   e Zd Zg dddgdZg dZi Zg Z		dded	ee	 d
e
ddf fddZe 	ddejdejdedejdejf
ddZe 	ddejdejdedeeef dejf
ddZdeee
ejf  fddZ  ZS )GemmaForCausalLM)q_projk_projv_proj	gate_projup_proj)r@   r   )r@   rA   r   r    Nr   r_   r   r   r   c                    s:   t    || _|| _t||td|d| _t|| _d S )Nmodelrk   )	r"   r#   r_   r   rj   r   r   r   logits_processorrw   r&   r(   r)   r#   &  s   
zGemmaForCausalLM.__init__rx   rO   rQ   ry   c                 C   s$   |  ||||}| ||| j j|S r*   )r   r   rq   )r%   rx   rO   rQ   ry   rP   r(   r(   r)   r.   4  s   zGemmaForCausalLM.forwardsplit_intervalc                 C   s   |\}}|dkr#|d u r| j ||_n||_| j| j jjd 9  _t||D ]}| j j| }	|	||j||j\|_|_q(|| j jjkr^| j 	|j|j\|_}
| 
||j| j j|}|S d }|S )Nr   rz   )r   rq   rP   r_   r   rs   ru   ri   rt   rv   r   )r%   rx   rO   rQ   r   ry   startendrm   r|   r-   resultr(   r(   r)   forward_split_prefillA  s6   	
	z&GemmaForCausalLM.forward_split_prefillweightsc                 C   s   g d}t |  }t }|D ]Z\}}|D ](\}}}	||vrq|||}|dr/||vr/q|| }
|
j}||
||	  n&d|v rCq|drM||vrMqd|v rU|d7 }|| }
t|
dt}||
| || qd S )N))r@   r~   rV   )r@   r   rW   )r@   r   rX   )r   r   r   )r   r   r?   z.biaszlm_head.weightznorm.weightg      ?weight_loader)	dictnamed_parameterssetreplaceendswithr   getattrr   add)r%   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_name
shard_nameshard_idparamr   r(   r(   r)   load_weightsq  s2   
zGemmaForCausalLM.load_weightsr/   r*   )r0   r1   r2   packed_modules_mappingsupported_lora_modulesembedding_modulesembedding_padding_modulesr   r   r   r4   r#   r\   no_gradr]   r   r.   r   r3   r   r   r   r5   r(   r(   r&   r)   r}     s^    
$/r}   )*__doc__typingr   r   r   r\   r   transformersr   sglang.srt.distributedr   sglang.srt.layers.activationr   sglang.srt.layers.layernormr	   sglang.srt.layers.linearr
   r   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   Moduler   r6   r^   rj   r}   
EntryClassr(   r(   r(   r)   <module>   s0    S97 