o
    پi72                     @   s4  d Z ddlmZmZmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlmZ ddlmZ ddl m!Z!m"Z" e" Z#G dd dej$Z%G dd dej$Z&G dd dej$Z'G dd dej$Z(G dd dej$Z)e)Z*dS )z{
Inference-only StableLM-2 (https://huggingface.co/stabilityai/stablelm-2-1_6b)
model compatible with HuggingFace weights.
    )IterableOptionalTupleN)nn)PretrainedConfig)$get_tensor_model_parallel_world_size)
SiluAndMul)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixis_npuc                	       sN   e Zd Z		ddedee deddf fddZd	ej	dej	fd
dZ
  ZS )StablelmMLPN configquant_configprefixreturnc                    sn   t    || _|j| _|j| _t|j|jgd d|td|d| _t|j|jd|td|d| _	t
 | _d S )N   Fgate_up_projbiasr   r   	down_proj)super__init__r   hidden_sizeintermediate_sizer	   r   r   r   r    r   act_fnselfr   r   r   	__class__ N/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/stablelm.pyr"   3   s&   

zStablelmMLP.__init__xc                 C   s*   |  |\}}| |}| |\}}|S N)r   r%   r    )r'   r,   gate_up_r*   r*   r+   forwardM   s   
zStablelmMLP.forwardNr   )__name__
__module____qualname__r   r   r   strr"   torchTensorr0   __classcell__r*   r*   r(   r+   r   2   s    r   c                       s^   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
dede	j
fddZ  ZS )StablelmAttentionr   Nr   r   layer_idr   r   r   c              
      s  t    || _|j| _t }|j| _| j| | _|j| _	| j	|kr,| j	| dks+J n	|| j	 dks5J t
d| j	| | _| j| j | _|j| _t|dt|dd}t| j| | _| jd | _| j| j | _| j| j | _t|dd| _| j| j | | jkrtd| j d	| j d
t| j| j| j| j	| j|td|d| _t| j| j | jd|td|d| _tst| j| j| jj| jjd| _nt| j| j| jj| jjtjd| _t | j| j| j| j||td|d| _!d S )Nr      rope_pctpartial_rotary_factorg      use_qkv_biasFz?hidden_size must be divisible by num_heads (got `hidden_size`: z and `num_heads`: z).qkv_projr   r   o_projr   )
rotary_dimmax_positionbase)rB   rC   rD   dtypeattn)num_kv_headsr:   r   r   )"r!   r"   r   r#   r   num_attention_headstotal_num_heads	num_headsnum_key_value_headstotal_num_key_value_headsmaxhead_dimmax_position_embeddingsgetattrintrotary_ndimsscalingq_sizekv_sizeqkv_bias
ValueErrorr
   r   r?   r   rA   _is_npur   
rope_theta
rotary_embr6   float32r   rF   )r'   r   r:   r   r   tp_sizer<   r(   r*   r+   r"   U   s   

	

zStablelmAttention.__init__	positionshidden_statesforward_batchc                 C   s   |  |\}}|j| j| j| jgdd\}}}ts#| |||\}}n|j}	| ||tj	|tj	\}}||	||	}}| 
||||}
| |
\}}|S )N)dim)r?   splitrT   rU   rX   rZ   rE   tor6   r[   rF   rA   )r'   r]   r^   r_   qkvr/   qkvodtypeattn_outputoutputr*   r*   r+   r0      s    "zStablelmAttention.forwardr   Nr   )r2   r3   r4   r   rQ   r   r   r5   r"   r6   r7   r   r0   r8   r*   r*   r(   r+   r9   T   s0    Rr9   c                       sh   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
dedee	j
e	j
f fddZ  ZS )StablelmDecoderLayerr   Nr   r   r:   r   r   r   c                    sr   t    t||td|d| _t||td|d| _t|dt|dd}tj	|j
|d| _tj	|j
|d| _d S )	N	self_attn)r:   r   mlpr@   norm_epslayer_norm_epsh㈵>eps)r!   r"   r9   r   rm   r   rn   rP   r   	LayerNormr#   input_layernormpost_attention_layernorm)r'   r   r:   r   r   ro   r(   r*   r+   r"      s   
zStablelmDecoderLayer.__init__r]   r^   r_   c                 C   sN   |}|  |}| j|||d}|| }|}| |}| |}|| }||fS )N)r]   r^   r_   )ru   rm   rv   rn   )r'   r]   r^   r_   residualr*   r*   r+   r0      s   


zStablelmDecoderLayer.forwardrk   )r2   r3   r4   r   rQ   r   r   r5   r"   r6   r7   r   r   r0   r8   r*   r*   r(   r+   rl      s0    rl   c                       sb   e Zd Z		ddedee deddf fddZ	dd	ej	d
ej	de
dej	dej	f
ddZ  ZS )StableLMEpochModelNr   r   r   r   r   c                    sr   t    t j jtdd| _t fddt	 j
D | _t dt dd}tj j|d| _d S )	Nembed_tokensr   c              	      s(   g | ]}t  |td | dqS )zlayers.r@   )rl   r   ).0ir   r   r   r*   r+   
<listcomp>   s    z/StableLMEpochModel.__init__.<locals>.<listcomp>ro   rp   rq   rr   )r!   r"   r   
vocab_sizer#   r   ry   r   
ModuleListrangenum_hidden_layerslayersrP   rt   norm)r'   r   r   r   ro   r(   r}   r+   r"      s   
zStableLMEpochModel.__init__	input_idsr]   r_   input_embedsc           	      C   sT   |d u r
|  |}n|}tt| jD ]}| j| }||||\}}q| |}|S r-   )ry   r   lenr   r   )	r'   r   r]   r_   r   r^   r|   layerrw   r*   r*   r+   r0     s   


zStableLMEpochModel.forwardr1   r-   )r2   r3   r4   r   r   r   r5   r"   r6   r7   r   r0   r8   r*   r*   r(   r+   rx      s0    rx   c                       s   e Zd Z		ddedee deddf fddZe	 	dd	ej
d
ej
dedej
dej
f
ddZdeeeej
f  fddZ  ZS )StableLmForCausalLMNr   r   r   r   r   c                    sT   t    || _|| _t||td|d| _t|j|j	td|d| _
t|| _d S )Nmodelr@   lm_headrz   )r!   r"   r   r   rx   r   r   r   r   r#   r   r   logits_processorr&   r(   r*   r+   r"     s   
zStableLmForCausalLM.__init__r   r]   r_   r   c                 C   s"   |  ||||}| ||| j|S r-   )r   r   r   )r'   r   r]   r_   r   r^   r*   r*   r+   r0   )  s   
zStableLmForCausalLM.forwardweightsc                 C   s   g d}t |  }|D ]V\}}d|v rqd|v sd|v rq|D ](\}}}||vr*q |||}|dr:||vr:q || }	|	j}
|
|	||  n|drS||vrSq|| }	t|	dt}
|
|	| qd S )N))r?   q_projre   )r?   k_projrf   )r?   v_projrg   )r   	gate_projr   )r   up_projr;   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedz.biasweight_loader)dictnamed_parametersreplaceendswithr   rP   r   )r'   r   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamr   r*   r*   r+   load_weights6  s0   
z StableLmForCausalLM.load_weightsr1   r-   )r2   r3   r4   r   r   r   r5   r"   r6   no_gradr7   r   r0   r   r   r   r8   r*   r*   r(   r+   r     s4    $r   )+__doc__typingr   r   r   r6   r   transformersr   sglang.srt.distributedr   sglang.srt.layers.activationr   sglang.srt.layers.linearr	   r
   r   "sglang.srt.layers.logits_processorr   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   r   rX   Moduler   r9   rl   rx   r   
EntryClassr*   r*   r*   r+   <module>   s.   "f,1D