o
    iAI                     @   s  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z,m-Z- ddl.m/Z/m0Z0m1Z1m2Z2m3Z3m4Z4m5Z5 G dd dej6Z7G dd dej6Z8G dd dej6Z9e
G dd dej6Z:G d d! d!ej6e,e-Z;dS )"z@Inference-only Exaone model compatible with HuggingFace weights.    )Iterable)isliceN)nn)Exaone4Config)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)	Attention)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors)set_default_rope_theta   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayerextract_layer_indexis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sT   e Zd Z				ddededededB d	ed
ededdf fddZdd Z  Z	S )Exaone4GatedMLPNTF hidden_sizeintermediate_size
hidden_actquant_configreduce_resultsbiasprefixreturnc                    sj   t    t||gd ||| dd| _t|||||| dd| _|dkr/td| dt | _d S )	N   .gate_up_proj)
input_sizeoutput_sizesr+   r)   r,   z
.down_proj)r0   output_sizer+   r)   r*   r,   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr&   r'   r(   r)   r*   r+   r,   	__class__ X/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/exaone4.pyr5   E   s*   


zExaone4GatedMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r6   r9   r7   )r:   xgate_up_r=   r=   r>   forwarde   s   
zExaone4GatedMLP.forward)NTFr%   )
__name__
__module____qualname__intstrr   boolr5   rC   __classcell__r=   r=   r;   r>   r$   D   s.    	 r$   c                       sv   e Zd Z					ddedededed	ed
edB dededB deddf fddZ	de
jde
jde
jfddZ  ZS )Exaone4Attention    NFr%   configr&   	num_headsnum_kv_headsmax_position_embeddingsr)   r+   cache_configr,   r-   c
              
      s  t    || _t }
|| _| j|
 dksJ | j|
 | _|| _| j|
kr/| j|
 dks.J n	|
| j dks8J td| j|
 | _t	|dd | _
| j
d u rT| j| j | _
| j| j
 | _| j| j
 | _| j
d | _|| _t|| j
| j| j|||	 dd| _t| j| j
 ||||	 dd| _t| j
|jd	| _t| j
|jd	| _d
}|d ur| dkrd}t|	}|j| dk}|r|jnd | _d|jv| _t|dd t| j
||j|d| _t | j| j
| j| j||| j|	 dd| _!d S )Nr   r   head_dimg      	.qkv_proj)r&   	head_sizetotal_num_headstotal_num_kv_headsr+   r)   r,   z.o_proj)r0   r2   r+   r)   r,   epsTggufFsliding_attentioni@B )default_theta)max_positionrope_parametersis_neox_stylez.attn)rO   rQ   r)   per_layer_sliding_windowr,   )"r4   r5   r&   r
   rU   rN   rV   maxrO   getattrrR   q_sizekv_sizescalingrP   r   qkv_projr   o_projr   rms_norm_epsq_normk_normget_namer   layer_typessliding_windowapply_rope_all_layersr   r   r]   
rotary_embr   attn)r:   rM   r&   rN   rO   rP   r)   r+   rQ   r,   tp_sizer^   	layer_idx
is_slidingr;   r=   r>   r5   m   sx   




zExaone4Attention.__init__	positionshidden_statesc           
      C   s   |  |\}}|j| j| j| jgdd\}}}|d| j| jf}| |}|dd}|d| j	| jf}| 
|}|dd}| jsG| jrP| |||\}}| |||}| |\}	}|	S )N)dim)re   splitrb   rc   	unflattenrN   rR   rh   flattenrO   ri   rl   rm   rn   ro   rf   )
r:   rs   rt   qkvrB   qkvattn_outputoutputr=   r=   r>   rC      s    

zExaone4Attention.forward)rL   NFNr%   )rD   rE   rF   r   rG   r   rI   r   rH   r5   torchTensorrC   rJ   r=   r=   r;   r>   rK   l   sD    	
WrK   c                       sr   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )Exaone4DecoderLayerNr%   rM   rQ   r)   r,   r-   c                    s   t    |j| _t|dd}t|ddpt|dd}t|| j|jt|d|j||||| dd	| _t| j|j|j	|t|d	d| d
d| _
t|j|jd| _t|j|jd| _d S )NrP   rL   attention_biasFr+   num_key_value_headsz
.self_attn)	rM   r&   rN   rO   rP   r)   r+   rQ   r,   mlp_biasz.mlp)r&   r'   r(   r)   r+   r,   rW   )r4   r5   r&   ra   rK   num_attention_heads	self_attnr$   r'   r(   mlpr   rg   post_attention_layernormpost_feedforward_layernorm)r:   rM   rQ   r)   r,   rP   r   r;   r=   r>   r5      sB   

zExaone4DecoderLayer.__init__rs   rt   residualc                 C   sL   |}| j ||d}| |}|| }|}| |}| |}|| }||fS )N)rs   rt   )r   r   r   r   )r:   rs   rt   r   r=   r=   r>   rC     s   


zExaone4DecoderLayer.forward)NNr%   )rD   rE   rF   r   r   r   rH   r5   r   r   tuplerC   rJ   r=   r=   r;   r>   r      s0    ,r   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )Exaone4Modelr%   r,   vllm_configr,   c                   s   t    |jj|j |j| _| _j| _t j	s$j
r/t jr/t| jjd| _nt | _tj fdd| dd\| _| _| _t jrXtjjd| _nt | _tddgj| _d S )	N)r)   c                    s   t  | dS )N)rM   rQ   r)   r,   )r   r   rQ   rM   r)   r=   r>   <lambda>>  s    z'Exaone4Model.__init__.<locals>.<lambda>z.layersr   rW   rt   r   )r4   r5   model_config	hf_configrQ   r)   rM   
vocab_sizer	   is_first_ranktie_word_embeddingsis_last_rankr   r&   embed_tokensr   r"   num_hidden_layersstart_layer	end_layerlayersr   rg   normr!   make_empty_intermediate_tensors)r:   r   r,   r;   r   r>   r5   '  s:   




zExaone4Model.__init__	input_idsr-   c                 C   s
   |  |S r?   )r   r:   r   r=   r=   r>   embed_input_idsO  s   
zExaone4Model.embed_input_idsNrs   intermediate_tensorsinputs_embedsc                 C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS | 	|}|S )Nrt   r   )rt   r   )
r	   r   r   r   r   r   r   r   r   r   )r:   r   rs   r   r   rt   r   layerr=   r=   r>   rC   R  s(   


zExaone4Model.forwardweightsc                 C   sb  g d}t |  }t }|D ]\}}d|v rqd|v s d|v r!q| jd urO| j| }rO|| }t|dt}	| dkr@|n|d }|	|| || q|D ].\}
}}||vr[qQ|	||
}|
drk||vrkqQt|| rqqQ|| }|j}	|	|||  n)|
dr||vrqt||}|d u rqt|| rq|| }t|dt}	|	|| || q|S )N))rS   z.q_projr|   )rS   z.k_projr}   )rS   z.v_projr~   )r/   z
.gate_projr   )r/   z.up_projr   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   z.bias)dictnamed_parameterssetr)   get_cache_scalera   r   rv   addreplaceendswithr    r   r   )r:   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idr=   r=   r>   load_weightss  sT   






zExaone4Model.load_weightsr?   )rD   rE   rF   r   rH   r5   r   r   r   r   rC   r   r   r   r   rJ   r=   r=   r;   r>   r   %  s     (
,!r   c                       s   e Zd Zg dddgdZdddZdd	d
edef fddZdej	dej	fddZ
		ddej	dB dej	dedB dej	dB dej	eB f
ddZdej	dej	dB fddZdeeeej	f  dee fddZ  ZS )Exaone4ForCausalLM)q_projk_projv_proj	gate_projup_proj)re   r6   input_embeddingsoutput_embeddings)r   lm_headr%   r   r   r,   c                   s   t    |jj}|j}|| _|| _t|t|dd| _t	 j
rGt|j|j|t|dd| _|jr8| jjj| j_t|dd}t|j|d| _nt | _| jj| _d S )Nmodel)r   r,   r   )r)   r,   logit_scaleg      ?)scale)r4   r5   r   r   r)   rM   r   r#   r   r	   r   r   r   r&   r   r   r   weightra   r   logits_processorr   r   )r:   r   r,   rM   r)   r   r;   r=   r>   r5     s2   

zExaone4ForCausalLM.__init__r   r-   c                 C   s   | j |S r?   )r   r   r   r=   r=   r>   r     s   z"Exaone4ForCausalLM.embed_input_idsNrs   r   r   c                 C   s   |  ||||}|S r?   )r   )r:   r   rs   r   r   model_outputr=   r=   r>   rC     s   zExaone4ForCausalLM.forwardrt   c                 C   s   |  | j|}|S r?   )r   r   )r:   rt   logitsr=   r=   r>   compute_logits  s   z!Exaone4ForCausalLM.compute_logitsr   c                 C   s$   t | | jjr	dgnd d}||S )Nzlm_head.)skip_prefixes)r   rM   r   r   )r:   r   loaderr=   r=   r>   r     s
   
zExaone4ForCausalLM.load_weights)NN)rD   rE   rF   packed_modules_mappingembedding_modulesr   rH   r5   r   r   r   r   rC   r   r   r   r   r   rJ   r=   r=   r;   r>   r     s<    !

,r   )<__doc__collections.abcr   	itertoolsr   r   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   vllm.transformers_utils.configr   
interfacesr   r   utilsr   r   r   r    r!   r"   r#   Moduler$   rK   r   r   r   r=   r=   r=   r>   <module>   s:   $(nK 