o
    -iyI                     @   s  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z%m&Z& ddl'm(Z( ddl)m*Z*m+Z+ ddl,m-Z-m.Z.m/Z/m0Z0m1Z1m2Z2 G dd dej3Z4G dd dej3Z5G dd dej3Z6G dd dej3Z7eG dd  d ej3Z8G d!d" d"ej3e*e+Z9dS )#z@Inference-only Exaone model compatible with HuggingFace weights.    )Iterable)isliceN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderPPMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sN   e Zd Z			ddededededB ded	ed
df fddZdd Z  Z	S )ExaoneGatedMLPNF hidden_sizeintermediate_size
hidden_actquant_configbiasprefixreturnc                    sh   t    t||gd ||| dd| _t||||| dd| _|dkr.td| dt | _d S )	N   .gate_up_proj)
input_sizeoutput_sizesr(   r'   r)   z.c_projr-   output_sizer(   r'   r)   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   c_proj
ValueErrorr   act_fn)selfr$   r%   r&   r'   r(   r)   	__class__ ^/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/exaone.pyr3   G   s(   
	
zExaoneGatedMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r4   r7   r5   )r8   xgate_up_r;   r;   r<   forwarde   s   
zExaoneGatedMLP.forward)NFr#   )
__name__
__module____qualname__intstrr   boolr3   rA   __classcell__r;   r;   r9   r<   r"   F   s(    r"   c                       v   e Zd Z					ddedededed	ed
edB dededB deddf fddZ	de
jde
jde
jfddZ  ZS )ExaoneAttention    NFr#   configr$   	num_headsnum_kv_headsmax_position_embeddingsr'   r(   cache_configr)   r*   c
              	      sx  t    || _t }
|| _| j|
 dksJ | j|
 | _|| _| j|
kr/| j|
 dks.J n	|
| j dks8J td| j|
 | _t	|dd | _
| j
d u rT| j| j | _
| j| j
 | _| j| j
 | _| j
d | _|| _t|| j
| j| j|||	 dd| _t| j| j
 ||||	 dd| _d	}|d ur| d
krd}t| j
||j|d| _t| j| j
| j| j|||	 dd| _d S )Nr   r   head_dimg      	.qkv_proj)r$   	head_sizetotal_num_headstotal_num_kv_headsr(   r'   r)   z	.out_projr/   TggufF)max_positionrope_parametersis_neox_style.attn)rN   rP   r'   r)   )r2   r3   r$   r   rT   rM   rU   maxrN   getattrrQ   q_sizekv_sizescalingrO   r   qkv_projr   out_projget_namer   rX   
rotary_embr   attn)r8   rL   r$   rM   rN   rO   r'   r(   rP   r)   tp_sizerY   r9   r;   r<   r3   m   sh   




zExaoneAttention.__init__	positionshidden_statesc           
      C   s`   |  |\}}|j| j| j| jgdd\}}}| |||\}}| |||}| |\}	}|	S )N)dim)r`   splitr]   r^   rc   rd   ra   )
r8   rf   rg   qkvr@   qkvattn_outputoutputr;   r;   r<   rA      s    zExaoneAttention.forwardrK   NFNr#   rB   rC   rD   r   rE   r   rG   r   rF   r3   torchTensorrA   rH   r;   r;   r9   r<   rJ   l   sD    	
KrJ   c                       rI   )ExaoneBlockAttentionrK   NFr#   rL   r$   rM   rN   rO   r'   r(   rP   r)   r*   c
           
         s0   t    t|||||||||	 dd	| _d S )Nz
.attention	rL   r$   rM   rN   rO   r'   r(   rP   r)   )r2   r3   rJ   	attention)
r8   rL   r$   rM   rN   rO   r'   r(   rP   r)   r9   r;   r<   r3      s   
zExaoneBlockAttention.__init__rf   rg   c                 C   s   | j ||dS N)rf   rg   )rw   )r8   rf   rg   r;   r;   r<   rA      s   zExaoneBlockAttention.forwardrq   rr   r;   r;   r9   r<   ru      sD    	
ru   c                       sr   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )ExaoneDecoderLayerNr#   rL   rP   r'   r)   r*   c                    s   t    |j| _t|dd}t|ddpt|dd}t|| j|jt|d|j||||| dd	| _t| j|j|j	|t|d	d| d
d| _
t|j|jd| _t|j|jd| _d S )NrO   rK   attention_biasFr(   num_key_value_headsrZ   rv   mlp_biasz.mlp)r$   r%   r&   r'   r(   r)   eps)r2   r3   r$   r\   ru   num_attention_headsrd   r"   r%   activation_functionmlpr   layer_norm_epsilonln_1ln_2)r8   rL   rP   r'   r)   rO   rz   r9   r;   r<   r3      s:   

zExaoneDecoderLayer.__init__rf   rg   residualc                 C   sX   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| |}||fS rx   )r   rd   r   r   )r8   rf   rg   r   r;   r;   r<   rA     s   
zExaoneDecoderLayer.forward)NNr#   )rB   rC   rD   r   r   r   rF   r3   rs   rt   tuplerA   rH   r;   r;   r9   r<   ry      s0    'ry   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )ExaoneModelr#   r)   vllm_configr)   c                   s   t    |jj|j |j| _| _j| _j| _t	 j
s(jr3t	 jr3t| jjd| _nt | _tj fdd| dd\| _| _| _t	 jr\tjjd| _nt | _tddgj| _d S )	N)r'   c                    s   t  | dS )N)rL   rP   r'   r)   )ry   r   rP   rL   r'   r;   r<   <lambda>C  s    z&ExaoneModel.__init__.<locals>.<lambda>z.hr   r}   rg   r   )r2   r3   model_config	hf_configrP   r'   rL   
vocab_sizewter
   is_first_ranktie_word_embeddingsis_last_rankr   r$   r   r    num_hidden_layersstart_layer	end_layerhr   r   ln_fr   make_empty_intermediate_tensors)r8   r   r)   r9   r   r<   r3   +  s<   




zExaoneModel.__init__	input_idsr*   c                 C   s
   |  |S r=   )r   r8   r   r;   r;   r<   embed_input_idsT  s   
zExaoneModel.embed_input_idsNrf   intermediate_tensorsinputs_embedsc           	      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS | 	||\}}|S )Nrg   r   )rg   r   )
r
   r   r   r   r   r   r   r   r   r   )	r8   r   rf   r   r   rg   r   layerr@   r;   r;   r<   rA   W  s(   

zExaoneModel.forwardweightsc                 C   sb  g d}t |  }t }|D ]\}}d|v rqd|v s d|v r!q| jd urO| j| }rO|| }t|dt}	| dkr@|n|d }|	|| || q|D ].\}
}}||vr[qQ|	||
}|
drk||vrkqQt|| rqqQ|| }|j}	|	|||  n)|
dr||vrqt||}|d u rqt|| rq|| }t|dt}	|	|| || q|S )N))rR   z.q_projrl   )rR   z.k_projrm   )rR   z.v_projrn   )r,   z.c_fc_0r   )r,   z.c_fc_1r   zrotary_emb.inv_freqzrotary_emb.cos_cachedzrotary_emb.sin_cachedweight_loaderr   z.bias)dictnamed_parameterssetr'   get_cache_scaler\   r   ri   addreplaceendswithr   r   r   )r8   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
scale_nameparamr   
param_nameweight_nameshard_idr;   r;   r<   load_weightsx  sT   






zExaoneModel.load_weightsr=   )rB   rC   rD   r	   rF   r3   rs   rt   r   r   rA   r   r   r   r   rH   r;   r;   r9   r<   r   )  s     )
,!r   c                       s   e Zd Zg dddgdZdddZdd	d
edef fddZdej	dej	fddZ
		ddej	dej	dedB dej	dB dej	eB f
ddZdej	dej	dB fddZdeeeej	f  dee fddZ  ZS )ExaoneForCausalLM)q_projk_projv_projc_fc_0c_fc_1)r`   r4   input_embeddingsoutput_embeddings)r   lm_headr#   r   r   r)   c                   s   t    |jj}|j}|| _|| _t|t|dd| _t	 j
rGt|j|j|t|dd| _|jr8| jjj| j_t|dd}t|j|d| _nt | _| jj| _d S )Nmodel)r   r)   r   )r'   r)   logit_scaleg      ?)scale)r2   r3   r   r   r'   rL   r   r!   transformerr
   r   r   r   r$   r   r   r   weightr\   r   logits_processorr   r   )r8   r   r)   rL   r'   r   r9   r;   r<   r3     s2   

zExaoneForCausalLM.__init__r   r*   c                 C   s   | j |S r=   )r   r   r   r;   r;   r<   r     s   z!ExaoneForCausalLM.embed_input_idsNrf   r   r   c                 C   s   |  ||||}|S r=   )r   )r8   r   rf   r   r   model_outputr;   r;   r<   rA     s   zExaoneForCausalLM.forwardrg   c                 C   s   |  | j|}|S r=   )r   r   )r8   rg   logitsr;   r;   r<   compute_logits  s   z ExaoneForCausalLM.compute_logitsr   c                 C   s$   t | | jjr	dgnd d}||S )Nzlm_head.)skip_prefixes)r   rL   r   r   )r8   r   loaderr;   r;   r<   r     s
   
zExaoneForCausalLM.load_weights)NN)rB   rC   rD   packed_modules_mappingembedding_modulesr	   rF   r3   rs   rt   r   r   rA   r   r   r   r   r   rH   r;   r;   r9   r<   r     s<    "

,r   ):__doc__collections.abcr   	itertoolsr   rs   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r    r!   Moduler"   rJ   ru   ry   r   r   r;   r;   r;   r<   <module>   s:    
&Y%? 