o
    پi5                     @   s6  d Z ddlmZmZmZmZmZ ddlZddlmZ ddl	m
Z
 ddlmZ ddlmZ ddlmZmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZmZ ddlm Z  ddl!m"Z" ddl#m$Z$ G dd dej%Z&G dd dej%Z'G dd dej%Z(G dd dej%Z)G dd dej%Z*e*Z+dS )z@Inference-only Exaone model compatible with HuggingFace weights.    )AnyDictIterableOptionalTupleN)nn)$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessorLogitsProcessorOutput)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixc                       sH   e Zd Z		ddedededee deddf fd	d
Zdd Z  Z	S )ExaoneGatedMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    sh   t    t||gd d|td|d| _t||d|td|d| _|dkr.td| dt | _	d S )	N   Fgate_up_projbiasr   r   c_projsiluzUnsupported activation: z!. Only silu is supported for now.)
super__init__r   r   r!   r   r$   
ValueErrorr	   act_fn)selfr   r   r   r   r   	__class__ L/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/exaone.pyr'   .   s(   

zExaoneGatedMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r!   r)   r$   )r*   xgate_up_r-   r-   r.   forwardL   s   
zExaoneGatedMLP.forwardNr   )
__name__
__module____qualname__intstrr   r   r'   r3   __classcell__r-   r-   r+   r.   r   -   s"    r   c                       s   e Zd Z							ddeded	ed
ededeeeef  de	dedee
 deddf fddZdejdejdedejfddZ  ZS )ExaoneAttentionr     NT   r   r   	num_headsnum_kv_headslayer_id
rope_thetarope_scalingrope_is_neox_stylemax_position_embeddingsr   r   r   c              
      sf  t    || _t }|| _| j| dksJ | j| | _|| _| j|kr/| j| dks.J n	|| j dks8J td| j| | _t	|d| j| j | _
t| j
t	|dd | _| j| j
 | _| j| j
 | _| j
d | _|| _|	| _t|| j
| j| jd|
td|d| _t| j| j
 |d|
td	|d| _t| j
| j|	|||d
| _t| j| j
| j| j||
d| _d S )Nr      head_dimpartial_rotary_factorg      Fqkv_projr"   out_proj)
rotary_dimmax_positionbaserB   is_neox_style)r?   r@   r   )r&   r'   r   r   total_num_headsr>   total_num_kv_headsmaxr?   getattrrF   r8   rJ   q_sizekv_sizescalingrA   rD   r   r   rH   r   rI   r   
rotary_embr   attn)r*   configr   r>   r?   r@   rA   rB   rC   rD   r   r   tp_sizer+   r-   r.   r'   T   sl   

	
zExaoneAttention.__init__	positionshidden_statesforward_batchc                 C   sb   |  |\}}|j| j| j| jgdd\}}}| |||\}}| ||||}	| |	\}
}|
S )N)dim)rH   splitrR   rS   rU   rV   rI   )r*   rY   rZ   r[   qkvr2   qkvattn_outputoutputr-   r-   r.   r3      s    zExaoneAttention.forward)r   r<   NTr=   Nr   )r5   r6   r7   r8   floatr   r   r9   r   boolr   r'   torchTensorr   r3   r:   r-   r-   r+   r.   r;   S   sP    	
Mr;   c                       sn   e Zd Z			ddedee deddf fdd	Zd
ej	dej	de
deej	 deej	ej	f f
ddZ  ZS )ExaoneDecoderLayerr   Nr   r@   r   r   r   c           
         s   t    |j| _t|dd}t|dd }|d ur$t|dd r$|j|d< t|dd}t|dd}t|| j|j|j||||||td	|d
| _	t
| j|j|j|td|d| _|j}	t|j|	d| _t|j|	d| _d S )NrA   r<   rB    original_max_position_embeddingsrC   TrD   r=   	self_attn)rW   r   r>   r?   r@   rA   rB   rC   rD   r   r   mlp)r   r   r   r   r   eps)r&   r'   r   rQ   rj   r;   num_attention_headsnum_key_value_headsr   rk   r   r   activation_functionrl   layer_norm_epsilonr
   ln_1ln_2)
r*   rW   r@   r   r   rA   rB   rC   rD   rms_norm_epsr+   r-   r.   r'      sD   

zExaoneDecoderLayer.__init__rY   rZ   r[   residualc                 C   sZ   |d u r|}|  |}n|  ||\}}| j|||d}| ||\}}| |}||fS )N)rY   rZ   r[   )rs   rk   rt   rl   )r*   rY   rZ   r[   rv   r-   r-   r.   r3      s   
zExaoneDecoderLayer.forward)r   Nr   )r5   r6   r7   r8   r   r   r9   r'   rg   rh   r   r   r3   r:   r-   r-   r+   r.   ri      s0    +ri   c                       s^   e Zd Z		ddee deddf fddZ	ddejd	ejd
e	dejdejf
ddZ
  ZS )ExaoneModelNr   r   r   r   c                    sn   t     | _ j| _ j| _t j j| _t	
 fddt jD | _ j}t j|d| _d S )Nc              	      s(   g | ]}t  |td | dqS )zh.r   r   )ri   r   ).0irW   r   r   r-   r.   
<listcomp>  s    z(ExaoneModel.__init__.<locals>.<listcomp>rm   )r&   r'   rW   pad_token_idpadding_idx
vocab_sizer   r   wter   
ModuleListrangenum_hidden_layershrr   r
   ln_f)r*   rW   r   r   ru   r+   r{   r.   r'      s   
zExaoneModel.__init__	input_idsrY   r[   input_embedsc           
      C   s`   |d u r
|  |}n|}d }tt| jD ]}| j| }|||||\}}q| ||\}}	|S r/   )r   r   lenr   r   )
r*   r   rY   r[   r   rZ   rv   rz   layerr2   r-   r-   r.   r3     s   

zExaoneModel.forwardr4   r/   )r5   r6   r7   r   r   r9   r'   rg   rh   r   r3   r:   r-   r-   r+   r.   rw      s,    !rw   c                       s   e Zd Z		ddee deddf fddZe 	ddej	d	ej	d
e
dej	def
ddZdeeeej	f  fddZ  ZS )ExaoneForCausalLMNr   r   r   r   c                    sh   t    || _|| _t||td|d| _| jjr | jj| _	nt
|j|jtd|d| _	t|| _d S )Ntransformerrx   lm_head)r   )r&   r'   rW   r   rw   r   r   tie_word_embeddingsr   r   r   r   r   r   logits_processor)r*   rW   r   r   r+   r-   r.   r'   *  s   
zExaoneForCausalLM.__init__r   rY   r[   r   c                 C   s"   |  ||||}| ||| j|S r/   )r   r   r   )r*   r   rY   r[   r   rZ   r-   r-   r.   r3   @  s   
zExaoneForCausalLM.forwardweightsc                 C   s   g d}t |  }|D ]j\}}d|v sd|v rqd|v s!d|v r"q|dr,||vr,q|dd}|D ](\}}}||vr>q4|||}|d	rN||vrNq4|| }	|	j}
|
|	||  n|d	rg||vrgq|| }	t|	d
t}
|
|	| qd S )N))rH   q_projr`   )rH   k_projra   )rH   v_projrb   )r!   c_fc_0r   )r!   c_fc_1rE   zrotary_emb.inv_freq	projectorzrotary_emb.cos_cachedzrotary_emb.sin_cachedzmodel.vision_towerzattn.attentionrk   z.biasweight_loader)dictnamed_parameters
startswithreplaceendswithr   rQ   r   )r*   r   stacked_params_mappingparams_dictnameloaded_weight
param_nameweight_nameshard_idparamr   r-   r-   r.   load_weightsO  s6   
zExaoneForCausalLM.load_weightsr4   r/   )r5   r6   r7   r   r   r9   r'   rg   no_gradrh   r   r   r3   r   r   r   r:   r-   r-   r+   r.   r   )  s0    $r   ),__doc__typingr   r   r   r   r   rg   r   sglang.srt.distributedr   sglang.srt.layers.activationr	   sglang.srt.layers.layernormr
   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   r   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   Moduler   r;   ri   rw   r   
EntryClassr-   r-   r-   r.   <module>   s,   &\E5P