o
    
۾i-                     @   s  d Z ddlmZ ddlZddlmZ ddlmZ ddlmZ ddl	m
Z
mZ ddlmZ dd	lmZmZmZ dd
lmZ ddlmZmZmZmZ ddlmZ ddlmZ ddlmZ G dd dejZ G dd deZ!G dd deZ"eG dd deZ#G dd deZ$dS )z-Mistral adaptation of the LLaMA architecture.    )IterableN)nn)LlamaConfig)support_torch_compile)CacheConfig
VllmConfig)
SiluAndMul)ColumnParallelLinearMergedColumnParallelLinearRowParallelLinear)QuantizationConfig)LlamaAttentionLlamaDecoderLayerLlamaForCausalLM
LlamaModel)IntermediateTensors)AttentionType   )AutoWeightsLoaderc                       sd   e Zd Z						ddededededB d	ed
edB dedededdf fddZdd Z  Z	S )
MistralMLPNF Thidden_sizeintermediate_size
hidden_actquant_configbiasgate_up_proj_biasprefixreduce_results
disable_tpreturnc
           
   	      s~   t    |d u r|n|}t||gd |||	| dd| _t||||||	| dd| _|dkr9td| dt | _d S )	N   z.gate_up_proj)
input_sizeoutput_sizesr   r   r   r   z
.down_proj)r"   output_sizer   r   r   r   r   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r
   gate_up_projr   	down_proj
ValueErrorr   act_fn)
selfr   r   r   r   r   r   r   r   r   	__class__ V/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/mistral.pyr'   !   s0   
	
zMistralMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r(   r+   r)   )r,   x_r/   r/   r0   forwardF   s   
zMistralMLP.forward)NFNr   TF)
__name__
__module____qualname__intstrr   boolr'   r4   __classcell__r/   r/   r-   r0   r       s:    	
%r   c                       s   e Zd Zddddddejfdedededed	ed
edB dedede	dB de
de
ddf fddZdejdejfddZdejdejdejfddZ  ZS )MistralAttentioni    NFr   configr   	num_headsnum_kv_headsmax_position_embeddingsr   r   bias_o_projcache_configr   	attn_typer    c                    sf   t  j|||||||||	|
|d t|dd }|d u| _| jr1|d us%J |d | _|d | _d S d S )N)r=   r   r>   r?   r@   r   r   rA   rB   r   rC   llama_4_scaling original_max_position_embeddingsbeta)r&   r'   getattrdo_llama_4_scaling0llama_4_scaling_original_max_position_embeddingsllama_4_scaling_beta)r,   r=   r   r>   r?   r@   r   r   rA   rB   r   rC   llama_4_scaling_configr-   r/   r0   r'   N   s.   
zMistralAttention.__init__	positionsc              	   C   s.   d| j tdt|| j    }|dS )Nr   )rJ   torchlogfloorrI   	unsqueeze)r,   rL   scalingr/   r/   r0   _get_llama_4_attn_scaleu   s   

z(MistralAttention._get_llama_4_attn_scalehidden_statesc                 C   s   |  |\}}|j| j| j| jgdd\}}}| |||\}}| jr0| |}|| |j}| 	|||}	| 
|	\}
}|
S )NrM   )dim)qkv_projsplitq_sizekv_size
rotary_embrH   rS   todtypeattno_proj)r,   rL   rT   qkvr3   qkv
attn_scaleattn_outputoutputr/   r/   r0   r4      s    
zMistralAttention.forward)r5   r6   r7   r   DECODERr   r8   r   r:   r   r9   r'   rN   TensorrS   r4   r;   r/   r/   r-   r0   r<   M   sR    	
'r<   c                       sv   e Zd Z		ddedededB ddf fddZ	dd	ejd
ejdejdB dejdB de	ejejf f
ddZ
  ZS )MistralDecoderLayerr   Nvllm_configr   r=   r    c              
      s   t  j|||td t|jddd | _|p|jj}t|ddr=t	
t|j|jdddt	 t|j|jddd| _d S d | _d S )N)ri   r   r=   attn_layer_type.)seprM   ada_rms_norm_t_condF)r"   r$   r   return_bias)r&   r'   r<   r8   rW   	layer_idxmodel_config	hf_configrG   r   
Sequentialr	   r   ada_rms_norm_t_cond_dimGELUr   rm   )r,   ri   r   r=   r-   r/   r0   r'      s2   

zMistralDecoderLayer.__init__rL   rT   residualt_condc                 C   s   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| jd ur7|d us.J |d| |  }| |}||fS )N)rL   rT   r   )input_layernorm	self_attnpost_attention_layernormrm   mlp)r,   rL   rT   ru   rv   r/   r/   r0   r4      s   

zMistralDecoderLayer.forward)r   Nr1   )r5   r6   r7   r   r9   r   r'   rN   rg   tupler4   r;   r/   r/   r-   r0   rh      s0    (rh   c                       s   e Zd Zdeddededeej f fddZ			dd	e
jdB d
e
jdedB de
jdB de
jdB de
jeB ee
jee
j f B f fddZ  ZS )MistralModelr   r   
layer_typeri   r   r~   c                      t  j|||d d S N)ri   r   r~   r&   r'   r,   ri   r   r~   r-   r/   r0   r'         zMistralModel.__init__N	input_idsrL   intermediate_tensorsinputs_embedsrv   r    c                    s   t  j|||||dS )N)rv   )r&   r4   )r,   r   rL   r   r   rv   r-   r/   r0   r4      s   
zMistralModel.forward)NN)r5   r6   r7   rh   r   r9   typer   Moduler'   rN   rg   r   r{   listr4   r;   r/   r/   r-   r0   r|      s2    r|   c                       s.  e Zd ZU i Zeeef ed< i dddddddd	d
dddddddddddddddddddddd d!d"d#d$d%d&d'd(Zd)ed*d+e	d,ed-e
ej f fd.d/Zd)efd+e	d,ed-e
ej fd0d1Zd2eeeejf  d3ee fd4d5Zd6ed7ejd3eeejf fd8d9Z  ZS ):MistralForCausalLMembedding_moduleslayerszmodel.layers	attentionrx   
qscale_actinput_scaleqscale_weightweight_scalezkv_fake_quantizer.qscale_actkv_scalezq_fake_quantizer.qscale_actzattn.q_scalezk_fake_quantizer.qscale_actk_scalezv_fake_quantizer.qscale_actv_scalewqq_projwkk_projwvv_projwor^   attention_normrw   feed_forwardrz   w1	gate_projw2r)   w3up_projry   zmodel.embed_tokenslm_headz
model.norm)ffn_normtok_embeddingsre   normr   r}   ri   r   r~   c                   r   r   r   r   r-   r/   r0   r'     r   zMistralForCausalLM.__init__c                 C   s   t |||dS r   )r|   r   r/   r/   r0   _init_model  s   zMistralForCausalLM._init_modelweightsr    c                    s2   t   jjr	dgnd d}| fdd|D S )Nzlm_head.)skip_prefixesc                 3   s     | ]\}}  ||V  qd S r1   )maybe_remap_mistral).0nameloaded_weightr,   r/   r0   	<genexpr>  s
    

z2MistralForCausalLM.load_weights.<locals>.<genexpr>)r   r=   tie_word_embeddingsload_weights)r,   r   loaderr/   r   r0   r     s   zMistralForCausalLM.load_weightsr   r   c                    s|  dt jdtdtf fdd} j}|d}d|v r+|d d	kr+|| jj jj}nFd|v rD|d d
krD| dkrD|| jjd}n-d|v rY|d d	krY|| jj	 jj}nd|v rq|d d
krq| dkrq|| jj	d}t
|}t|D ]@}|| }||d k r||d  nd }	|	d ur| d|	 nd }
|
|v r||
||
 }qy||v r|| |vr|||| }qy||fS )Nwn_headsattn_outc                    s4    j j| }| ||| d d|dd||S )Nr!   r   )r=   head_dimview	transposereshape)r   r   r   attn_inr   r/   r0   permute&  s
   z7MistralForCausalLM.maybe_remap_mistral.<locals>.permuterk   r   rM   weightr   r   r   )rN   rg   r8   mistral_mappingrW   r=   num_key_value_headsr   numelnum_attention_headslenrangereplace)r,   r   r   r   mappingmodulesnum_modulesiitem	next_itemcombined_itemr/   r   r0   r   !  s<   	
z&MistralForCausalLM.maybe_remap_mistral)r5   r6   r7   r   dictr9   __annotations__r   rh   r   r   r   r   r'   r   r   r{   rN   rg   setr   r   r;   r/   r/   r-   r0   r      s   
 	

$

r   )%__doc__collections.abcr   rN   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr	   r
   r   'vllm.model_executor.layers.quantizationr    vllm.model_executor.models.llamar   r   r   r   vllm.sequencer   vllm.v1.attention.backendr   utilsr   r   r   r<   rh   r|   r   r/   r/   r/   r0   <module>   s(   -C>