o
    پid                     @   s  d dl mZ d dlmZmZmZmZmZ d dlZd dlm	Z	 d dl
mZ d dlmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZ d dlmZmZ d dlmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z(m)Z) d dl*m+Z+m,Z, d dl-m.Z.m/Z/ d dl0m1Z1m2Z2 d dl3m4Z4 d dl5m6Z6m7Z7 d dl8m9Z9m:Z: dd Z;G dd de	j<Z=G dd de	j<Z>G dd de	j<Z?G dd  d e	j<Z@G d!d" d"e	j<ZAeAZBdS )#    )Iterable)AnyListOptionalTupleUnionN)nn)Exaone4Config)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)get_attention_tp_rankget_attention_tp_sizeget_local_attention_dp_size)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessorLogitsProcessorOutput)PoolerPoolingType)QuantizationConfig)RadixAttention)get_rope)PPMissingLayerget_layer_id)ParallelLMHeadVocabParallelEmbedding)ForwardBatchPPProxyTensors)default_weight_loadermaybe_remap_kv_scale_name)get_global_server_args)
add_prefixmake_layers)get_exception_tracebackloggerc                 C   s   t | dd d ur| jd S d S )Nsliding_window   )getattrr(   )config r,   M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/exaone4.py!get_attention_sliding_window_size+   s   
r.   c                       sN   e Zd Z			ddedededee ded	ed
df fddZdd Z	  Z
S )Exaone4GatedMLPNF hidden_sizeintermediate_size
hidden_actquant_configbiasprefixreturnc                    sh   t    t||gd ||td|d| _t||||td|d| _|dkr.td| dt | _	d S )N   gate_up_proj)r5   r4   r6   	down_projsiluzUnsupported activation: z!. Only silu is supported for now.)
super__init__r   r$   r9   r   r:   
ValueErrorr   act_fn)selfr1   r2   r3   r4   r5   r6   	__class__r,   r-   r=   3   s(   
	
zExaone4GatedMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r9   r?   r:   )r@   xgate_up_r,   r,   r-   forwardR   s   
zExaone4GatedMLP.forward)NFr0   )__name__
__module____qualname__intstrr   r   boolr=   rG   __classcell__r,   r,   rA   r-   r/   2   s(    r/   c                       s   e Zd Z										dded	ed
ededee dededeeeef  dedee	 de
de
deddf fddZdejdejdedejfddZ  ZS )Exaone4Attentionr   Nư>'      Fr0   r1   	num_headsnum_kv_headslayer_idhead_dimrms_norm_eps
rope_thetarope_scalingmax_position_embeddingsr4   r5   bias_o_projr6   r7   c                    s  t    || _t }t }t }|| _| j| dksJ | j| | _|| _| j|kr5| j| dks4J n	|| j dks>J t	d| j| | _
|pM|| j | _| j| j | _| j
| j | _| jd | _|| _|
| _t|| j| j| j||td|||d	| _t| j| j |||td|||d| _d}|d ur| d	krd
}t|}t|dd | _d
| _| jr|d t| j dkrd| _t| j| j|
||	|d| _t| j| j| j| j
|| jr|nd |td|d| _t | j|d| _!t | j|d| _"d S )Nr   r)   g      qkv_proj)	r1   	head_sizetotal_num_headstotal_num_kv_headsr5   r4   r6   tp_ranktp_sizeo_proj)
input_sizeoutput_sizer5   r4   r6   r`   ra   TggufFsliding_window_pattern)
rotary_dimmax_positionbaserY   is_neox_styleattn)rT   rU   sliding_window_sizer4   r6   eps)#r<   r=   r1   r   r   r   r^   rS   r_   maxrT   rV   q_sizekv_sizescalingrX   rZ   r   r$   r\   r   rb   get_namer.   r*   rf   
is_slidinglenr   
rotary_embr   rk   r   q_normk_norm)r@   r+   r1   rS   rT   rU   rV   rW   rX   rY   rZ   r4   r5   r[   r6   ra   attn_tp_rankattn_tp_sizerj   interleaved_sliding_windowrA   r,   r-   r=   Z   s   



zExaone4Attention.__init__	positionshidden_statesforward_batchc                 C   s   |  |\}}|j| j| j| jgdd\}}}|j}	|d| j}| |}||	}|j}
|d| j}| |}||
}| j	rE| j
rN| |||\}}| ||||}| |\}}|S )Ndim)r\   splitrp   rq   shapereshaperV   rw   rx   rf   rt   rv   rk   rb   )r@   r|   r}   r~   qkvrF   qkvq_shapek_shapeattn_outputoutputr,   r,   r-   rG      s    



zExaone4Attention.forward)
r   NrP   rQ   NrR   NFFr0   )rH   rI   rJ   rK   r   floatdictrL   r   r   rM   r=   torchTensorr   rG   rN   r,   r,   rA   r-   rO   Y   sb    	
grO   c                       sr   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )Exaone4DecoderLayerr   Nr0   r+   rU   r4   r6   r7   c                    s   t    || _|j| _t|dd}t|dd }|d ur't|dd r'|j|d< t|dd}t | _t | _	t
 | _t|| j|jt|d|j|||||td|d	
| _t| j|j|j|td
|d| _t| j|jd| _t| j|jd| _d S )NrX   i@B rY    original_max_position_embeddingsrZ   rR   num_key_value_heads	self_attn)
r+   r1   rS   rT   rU   rX   rY   rZ   r4   r6   mlp)r1   r2   r3   r4   r6   rm   )r<   r=   rU   r1   r*   r   r   local_dp_sizer   rz   r   ry   rO   num_attention_headsr   r$   r   r/   r2   r3   r   r   rW   post_attention_layernormpost_feedforward_layernorm)r@   r+   rU   r4   r6   rX   rY   rZ   rA   r,   r-   r=      sR   

zExaone4DecoderLayer.__init__r|   r}   r~   residualc                 C   sZ   |d u r|}| j |||d}| |}|| }|}| |}| |}|| }|}||fS )N)r|   r}   r~   )r   r   r   r   )r@   r|   r}   r~   r   r,   r,   r-   rG     s   


zExaone4DecoderLayer.forward)r   Nr0   )rH   rI   rJ   r	   rK   r   r   rL   r=   r   r   r   tuplerG   rN   r,   r,   rA   r-   r      s4    6r   c                       s   e Zd Z		ddee def fddZdejdejfd	d
Z			ddejdejde
deej dee deejeejeej f ef fddZ  ZS )Exaone4ModelNr0   r4   r6   c                    s   t     | _| _ j| _t | _| jjr&t j j	t
d|d| _nt | _t j fdd| jj| jjt
d|d\| _| _| _| jjrUt j	 jd| _d S tdd	| _d S )
Nembed_tokensr4   r6   c                    s   t  | |dS )N)r+   r4   rU   r6   )r   )idxr6   r+   r4   r,   r-   <lambda>M  s    z'Exaone4Model.__init__.<locals>.<lambda>layers)pp_rankpp_sizer6   rm   T)return_tuple)r<   r=   r+   r4   
vocab_sizer
   pp_groupis_first_rankr   r1   r$   r   r   r%   num_hidden_layersrank_in_group
world_sizer   start_layer	end_layeris_last_rankr   rW   normr@   r+   r4   r6   rA   r   r-   r=   6  s.   

zExaone4Model.__init__	input_idsr7   c                 C   s
   |  |S rC   )r   )r@   r   r,   r,   r-   get_input_embeddings\     
z!Exaone4Model.get_input_embeddingsr|   r~   input_embedspp_proxy_tensorsc           
      C   s   | j jr|d u r| |}n|}d }n|d usJ |d }|d }tt| jD ]}| j| }	|	||||\}}q(| j jsDt||dS | |}|S )Nr}   r   )r}   r   )	r   r   r   rangeru   r   r   r    r   )
r@   r   r|   r~   r   r   r}   r   ilayerr,   r,   r-   rG   _  s0   


zExaone4Model.forwardNr0   )NN)rH   rI   rJ   r   r   rL   r=   r   r   r   r   r    r   r   r   rG   rN   r,   r,   rA   r-   r   5  s0    &r   c                       s  e Zd ZdgZddiZddgdgfiZdZg dZdd	d
dddZg dddgdZ			dDde
e def fddZ		dDde
e defddZdejfddZe 			dEdejdejd ed!e
ej d"ed#e
e defd$d%Ze 	dFdejdejd ed&eeef d!ejf
d'd(Zed)d* Zed+d, Zd-d. Z d/e!eeejf  fd0d1Z"	3dGd4ed5ed6ede
ej fd7d8Z#d9d: Z$d;d< Z%d=d> Z&d?d@ Z'dAeddfdBdCZ(  Z)S )HExaone4ForCausalLMlm_head.weightlm_headcolwise_repr}   logitslanguage_model)z.gate_proj.z.down_proj.z	.up_proj.z.q_proj.z.k_proj.z.v_proj.z.o_proj.)	.qkv_projr   )r   r)   )r   r8   ).gate_up_projr   )r   r)   ).q_proj.k_proj.v_proj
.gate_proj.up_proj)q_projk_projv_proj	gate_projup_proj)r\   r9   Nr0   r4   r6   c                    s   t    t | _|| _|| _| ||td|| _|j	r#| jj
| _nt|j|j|td|t jd| _t|| _ttjdd| _d S )Nmodelr   )r4   r6   use_attn_tp_groupT)pooling_type	normalize)r<   r=   r
   r   r+   r4   _init_modelr$   r   tie_word_embeddingsr   r   r   r   r1   r#   enable_dp_lm_headr   logits_processorr   r   LASTpoolerr   rA   r,   r-   r=     s    

zExaone4ForCausalLM.__init__c                 C   s   t |||dS )Nr   )r   r   r,   r,   r-   r     s   zExaone4ForCausalLM._init_modelr7   c                 C      | j jS rC   )r   r   r@   r,   r,   r-   r     s   z'Exaone4ForCausalLM.get_input_embeddingsFr   r|   r~   r   get_embeddingr   c                 C   sB   | j |||||d}| jjr|s| ||| j|S | ||S |S )N)r   )r   r   r   r   r   r   )r@   r   r|   r~   r   r   r   r}   r,   r,   r-   rG     s"   
zExaone4ForCausalLM.forwardsplit_intervalc                 C   s   |\}}|dkr|d u r| j ||_n||_t||D ]}| j j| }	|	||j||j\|_|_q|| j jjkrS| j |j|j\}
}|
|_| 	||j| j
|}|S d }|S )Nr   )r   r   r}   r   r   r   r+   r   r   r   r   )r@   r   r|   r~   r   r   startendr   r   r}   rF   resultr,   r,   r-   forward_split_prefill  s0   	z(Exaone4ForCausalLM.forward_split_prefillc                 C   r   rC   )r   r   r   r,   r,   r-   r        zExaone4ForCausalLM.start_layerc                 C   r   rC   )r   r   r   r,   r,   r-   r     r   zExaone4ForCausalLM.end_layerc                 C   s
   t | jS rC   )r.   r+   r   r,   r,   r-   r.   !  r   z4Exaone4ForCausalLM.get_attention_sliding_window_sizeweightsc                 C   s  g d}t |  }|D ]\}}t|}|d ur+t| jdr+|| jjk s*|| jjkr+qd|v s3d|v r4qd|v s<d|v r=q|drG||vrGq| jj	rPd|v rPqd	|v r^t
||}|d u r^q|D ]-\}}}	||vrjq`|||}|d
rz||vrzq`||vrq`|| }
|
j}||
||	  n3|d
r||vrq|dr||vrq|| v r|| }
t|
dt}||
| qtd| d qd S )N))r   r   r   )r   r   r   )r   r   r   )r   r   r   )r   r   r)   r   zrotary_emb.inv_freq	projectorzrotary_emb.cos_cachedzrotary_emb.sin_cachedzmodel.vision_towerr   scalez.biasz	.kv_scaleweight_loaderz
Parameter z not found in params_dict)r   named_parametersr   hasattrr   r   r   
startswithr+   r   r"   replaceendswithr   keysr*   r!   r'   warning)r@   r   stacked_params_mappingparams_dictnameloaded_weightrU   
param_nameweight_nameshard_idparamr   r,   r,   r-   load_weights$  s^   	

zExaone4ForCausalLM.load_weightsd   r)   r   truncate_sizera   c              	      s  z|dkr"| j jr"td | jjj t	j
  d| W S |}d}| jD ]\}}}||v r<|||}|} nq)t|  }	|	| }
|dur|dv r| j j| }| j j| }| j j| j j }|dkrnd}|| }n|dkr{|| }|| }n|dkr|| | }|| }|
jd|| n-|d	v r| j j}|| }|dkrd}|}n|d
kr|}|}|
jd|| n|
j n|
j |d
krd|v sd|v r fddt|D }t	j|  t	j|d
d   t	j
  d| W S  ty   td| dt   Y dS w )zGet the weights of the parameter by its name. Similar to `get_parameter` in Hugging Face.

        Only used for unit test with an unoptimized performance.
        For optimized performance, please use torch.save and torch.load.
        r   zTword embedding is tied for this model, return embed_tokens.weight as lm_head.weight.N)r   r   r   r   r   r   r   )r   r)   r)   rb   r:   c                    s   g | ]}t  qS r,   )r   
zeros_like).0rF   weightr,   r-   
<listcomp>  s    z:Exaone4ForCausalLM.get_weights_by_name.<locals>.<listcomp>r   zError getting weights by name z in Exaone4ForCausalLM: )r+   r   r'   infor   r   r   cputor   float32numpytolistr   r   r   r   r   r   r1   datanarrowr2   r   distributed
all_gathercat	Exceptionerrorr&   )r@   r   r   ra   mapped_namemapped_shard_idr   r   r   r   r   rS   rT   rV   offsetsizer2   
slice_sizegathered_weightsr,   r   r-   get_weights_by_nameh  sx   


"z&Exaone4ForCausalLM.get_weights_by_namec                 C   s   | j jj| jjfS rC   )r   r   r   r   r   r,   r,   r-   get_embed_and_head  s   z%Exaone4ForCausalLM.get_embed_and_headc                 C   s8   | j j`| j`|| j j_|| j_tj  tj  d S rC   )r   r   r   r   r   cudaempty_cachesynchronize)r@   embedheadr,   r,   r-   set_embed_and_head  s   

z%Exaone4ForCausalLM.set_embed_and_headc                 C   s
   | j jjS rC   )r   r   r   r   r,   r,   r-   	get_embed  r   zExaone4ForCausalLM.get_embedc                 C   sJ   t | jdr| jj| jjkrd S | jj`|| jj_tj	  tj
  d S )Ntarget_hidden_size)r   r+   r  r1   r   r   r   r   r  r  r  )r@   r  r,   r,   r-   	set_embed  s   


zExaone4ForCausalLM.set_embedquantization_param_pathc                 C   s   | j | d S rC   )r   load_kv_cache_scales)r@   r  r,   r,   r-   r    s   z'Exaone4ForCausalLM.load_kv_cache_scalesr   )NFNrC   )r   r)   )*rH   rI   rJ   _tied_weights_keys_tp_plan_pp_planbase_model_prefix#default_bitsandbytes_target_modules#bitsandbytes_stacked_params_mappingpacked_modules_mappingr   r   rL   r=   r   r   	Embeddingr   r   no_gradr   r   rM   r    r   rG   r   rK   r   propertyr   r   r.   r   r   r  r  r  r  r  r  rN   r,   r,   rA   r-   r     s    
	

(

E
Ir   )Ccollections.abcr   typingr   r   r   r   r   r   r   transformersr	   sglang.srt.distributedr
   r   sglang.srt.layers.activationr   sglang.srt.layers.dp_attentionr   r   r   sglang.srt.layers.layernormr   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   r   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   sglang.srt.layers.utilsr   r   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   r    $sglang.srt.model_loader.weight_utilsr!   r"   sglang.srt.server_argsr#   sglang.srt.utilsr$   r%   sglang.utilsr&   r'   r.   Moduler/   rO   r   r   r   
EntryClassr,   r,   r,   r-   <module>   s@    ' YQ  K