o
    پi6                     @   s>  d Z ddlmZmZmZmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z% G dd dej&Z'G dd dej&Z(G dd dej&Z)G dd dej&Z*G dd dej&Z+e+Z,dS )z@Inference-only XVERSE model compatible with HuggingFace weights.    )AnyDictIterableOptionalTupleN)nn)LlamaConfig)$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixc                       sH   e Zd Z		ddedededee deddf fd	d
Zdd Z  Z	S )	XverseMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    sh   t    t||gd d|td|d| _t||d|td|d| _|dkr.td| dt | _	d S )	N   Fgate_up_projbiasr   r   	down_projsiluzUnsupported activation: z!. Only silu is supported for now.)
super__init__r   r   r!   r   r$   
ValueErrorr
   act_fn)selfr   r   r   r   r   	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/xverse.pyr'   /   s(   

zXverseMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r!   r)   r$   )r*   xgate_up_r-   r-   r.   forwardM   s   
zXverseMLP.forwardNr   )
__name__
__module____qualname__intstrr   r   r'   r3   __classcell__r-   r-   r+   r.   r   .   s"    r   c                       s   e Zd Z							ddeded	ed
edededeeee	f  de
dedee deddf fddZdejdejdedejfddZ  ZS )XverseAttentionr   '  NT    r   configr   	num_headsnum_kv_headslayer_id
rope_thetarope_scalingrope_is_neox_stylemax_position_embeddingsr   r   r   c              
      sV  t    || _t }|| _| j| dksJ | j| | _|| _| j|kr/| j| dks.J n	|| j dks8J td| j| | _t	|d| j| j | _
| j| j
 | _| j| j
 | _| j
d | _|| _|	| _t|| j
| j| jd|
td|d| _t| j| j
 |d|
td|d| _t| j
| j
|	|||d	| _t| j| j
| j| j||
td
|d| _d S )Nr      head_dimg      Fqkv_projr"   o_proj)
rotary_dimmax_positionbaserC   is_neox_styleattn)r@   rA   r   r   )r&   r'   r   r	   total_num_headsr?   total_num_kv_headsmaxr@   getattrrG   q_sizekv_sizescalingrB   rE   r   r   rH   r   rI   r   
rotary_embr   rN   )r*   r>   r   r?   r@   rA   rB   rC   rD   rE   r   r   tp_sizer+   r-   r.   r'   U   sh   

	
zXverseAttention.__init__	positionshidden_statesforward_batchc                 C   sb   |  |\}}|j| j| j| jgdd\}}}| |||\}}| ||||}	| |	\}
}|
S )N)dim)rH   splitrS   rT   rV   rN   rI   )r*   rX   rY   rZ   qkvr2   qkvattn_outputoutputr-   r-   r.   r3      s    zXverseAttention.forward)r   r<   NTr=   Nr   )r5   r6   r7   r   r8   floatr   r   r9   r   boolr   r'   torchTensorr   r3   r:   r-   r-   r+   r.   r;   T   sT    	
Kr;   c                       sr   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )XverseDecoderLayerr   Nr   r>   rA   r   r   r   c           
         s   t    |j| _t|dd}t|dd }|d ur$t|dd r$|j|d< t|dd}t|dd}t|d	|j}	t|| j|j|	||||||td
|d| _t	| j|j
|j|td|d| _t|j|jd| _t|j|jd| _d S )NrB   r<   rC    original_max_position_embeddingsrD   TrE   r=   num_key_value_heads	self_attn)r>   r   r?   r@   rA   rB   rC   rD   rE   r   r   mlp)r   r   r   r   r   eps)r&   r'   r   rR   ri   num_attention_headsr;   r   rk   r   r   r   rl   r   rms_norm_epsinput_layernormpost_attention_layernorm)
r*   r>   rA   r   r   rB   rC   rD   rE   r@   r+   r-   r.   r'      sL   

zXverseDecoderLayer.__init__rX   rY   rZ   residualc                 C   sZ   |d u r|}|  |}n|  ||\}}| j|||d}| ||\}}| |}||fS )N)rX   rY   rZ   )rq   rk   rr   rl   )r*   rX   rY   rZ   rs   r-   r-   r.   r3      s   
zXverseDecoderLayer.forward)r   Nr   )r5   r6   r7   r   r8   r   r   r9   r'   rf   rg   r   r   r3   r:   r-   r-   r+   r.   rh      s4    /rh   c                       sb   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dej	dej	f
ddZ  ZS )XverseModelNr   r>   r   r   r   c                    st   t     | _ j| _ j| _t j jtdd| _	t
 fddt jD | _t j jd| _d S )Nembed_tokensr   c              	      s(   g | ]}t  |td | dqS )zlayers.r   r   )rh   r   ).0ir>   r   r   r-   r.   
<listcomp>  s    z(XverseModel.__init__.<locals>.<listcomp>rm   )r&   r'   r>   pad_token_idpadding_idx
vocab_sizer   r   r   ru   r   
ModuleListrangenum_hidden_layerslayersr   rp   normr*   r>   r   r   r+   rz   r.   r'      s   
zXverseModel.__init__	input_idsrX   rZ   input_embedsc           
      C   s`   |d u r
|  |}n|}d }tt| jD ]}| j| }|||||\}}q| ||\}}	|S r/   )ru   r   lenr   r   )
r*   r   rX   rZ   r   rY   rs   ry   layerr2   r-   r-   r.   r3     s   

zXverseModel.forwardr4   r/   )r5   r6   r7   r   r   r   r9   r'   rf   rg   r   r3   r:   r-   r-   r+   r.   rt      s0    !rt   c                       s   e Zd Z		ddedee deddf fddZe	 	dd	ej
d
ej
dedej
dej
f
ddZ	ddeeeej
f  fddZ  ZS )XverseForCausalLMNr   r>   r   r   r   c                    sT   t    || _|| _t||td|d| _t|j|j	td|d| _
t|| _d S )Nmodelrw   lm_headrv   )r&   r'   r>   r   rt   r   r   r   r~   r   r   r   logits_processorr   r+   r-   r.   r'   .  s   
zXverseForCausalLM.__init__r   rX   rZ   r   c                 C   s"   |  ||||}| ||| j|S r/   )r   r   r   )r*   r   rX   rZ   r   rY   r-   r-   r.   r3   ?  s   
zXverseForCausalLM.forwardweightsc                    s\   g dt |    fdd}|d u s|d u r'|D ]	\}}||| qd S ||| d S )N))rH   q_projr_   )rH   k_projr`   )rH   v_projra   )r!   	gate_projr   )r!   up_projrF   c                    s   d| v sd| v r
d S d| v sd| v rd S D ]3\}}}|| vr q|  ||} | dr0|  vr0q| dr:|  vr:q |  }|j}||||  d S | drU|  vrUd S | dr`|  vr`d S  |  }t|dt}||| d S )Nzrotary_emb.inv_freq	projectorzrotary_emb.cos_cachedzrotary_emb.sin_cachedz.biaszmodel.vision_towerweight_loader)replaceendswith
startswithr   rR   r   )nameloaded_weight
param_nameweight_nameshard_idparamr   params_dictstacked_params_mappingr-   r.   load_weights_per_paramY  s.   z>XverseForCausalLM.load_weights.<locals>.load_weights_per_param)dictnamed_parameters)r*   r   r   r   r   r-   r   r.   load_weightsL  s   zXverseForCausalLM.load_weightsr4   r/   )NN)r5   r6   r7   r   r   r   r9   r'   rf   no_gradrg   r   r3   r   r   r   r:   r-   r-   r+   r.   r   -  s:    r   )-__doc__typingr   r   r   r   r   rf   r   transformersr   sglang.srt.distributedr	   sglang.srt.layers.activationr
   sglang.srt.layers.layernormr   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   &sglang.srt.model_executor.model_runnerr   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   Moduler   r;   rh   rt   r   
EntryClassr-   r-   r-   r.   <module>   s.   &ZI6Q