o
    -iW/                     @   s|  d dl Z d dlmZ d dlmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZ d dlmZmZ d dlmZmZmZ d d	lmZ d d
lmZmZmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z" d dl#m$Z$ ddl%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z, de-de-dej.fddZ/G dd dej0Z1G dd dej0Z2G dd dej0Z3eG dd dej0Z4G d d! d!ej0e&Z5dS )"    N)Iterable)islice)	MptConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)VocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixtotal_num_headsalibi_bias_maxreturnc                 C   s~   dt t |  }tjd|d tjd}||| }dtd| }|| kr=t|dd d |d d d gd |  }|S )N   r   )dtype      ?)	mathceillog2torcharangefloat32mulpowconcat)r   r   next_power_of_2mslopes r.   [/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/mpt.py_get_alibi_slopes+   s   *r0   c                	       Z   e Zd Z			ddededB dedB def fddZd	ej	d
ej	dej	fddZ
  ZS )MPTAttentionN configcache_configquant_configprefixc              
      s  t    |j| _|j| _| j| j | _|jj| _|jj| _|jj	| _	d|jv r.|jj
| _n| j| _|jjr8J |jjs>J t| j| j| j | j| j|j || dd| _| jrgt| j| _t| j| _t| j| j|j || dd| _t }| j| dksJ | j| | _| j|kr| j| dksJ n	|| j dksJ td| j| | _| j| j | _| j| j | _t }|| j }|d | j }t| j| j	}	|	||  }	| j| j | _| jd }
t | j| j|
|	| j||| dd	| _!d S )
N
kv_n_headsz.Wqkvbiasr6   r7   z	.out_projr   r   g      .attn)alibi_slopesnum_kv_headsr5   r6   r7   )"super__init__d_modeln_headsr   head_dimattn_configclip_qkvqk_lnr   r8   total_num_kv_heads	prefix_lmalibir   no_biasWqkvnn	LayerNormq_lnk_lnr   out_projr   	num_headsmaxr=   q_sizekv_sizer
   r0   tolistr   attn)selfr4   r5   r6   r7   tp_world_sizetp_rank
head_starthead_endr<   scaling	__class__r.   r/   r?   9   sr   





	


zMPTAttention.__init__position_idshidden_statesr   c           
      C   s   ~|  |\}}| jd ur|j| j | jd |j| j| j| jgdd\}}}| jr4| |}| |}| 	|||}| 
|\}	}|	S )N)minrQ   )dim)rJ   rD   clamp_splitrR   rS   rE   rM   rN   rU   rO   )
rV   r^   r_   qkv_qkvattn_outputoutputr.   r.   r/   forward   s   
 

zMPTAttention.forwardNNr3   __name__
__module____qualname__r   r   r   strr?   r%   Tensorrl   __classcell__r.   r.   r\   r/   r2   8   s(    Mr2   c                       sJ   e Zd Z		ddededB def fddZdejd	ejfd
dZ	  Z
S )MPTMLPNr3   r4   r6   r7   c                    sh   t    |j}|j}|| }t|||j || dd| _td| _t	|||j || dd| _
d S )Nz.up_projr9   geluz
.down_proj)r>   r?   r@   expansion_ratior   rI   up_projr   actr   	down_proj)rV   r4   r6   r7   hidden_sizerw   intermediate_sizer\   r.   r/   r?      s&   

zMPTMLP.__init__xr   c                 C   s*   |  |\}}| |}| |\}}|S N)rx   ry   rz   )rV   r}   rf   r.   r.   r/   rl      s   
zMPTMLP.forward)Nr3   )ro   rp   rq   r   r   rr   r?   r%   rs   rl   rt   r.   r.   r\   r/   ru      s    ru   c                	       r1   )MPTBlockNr3   r4   r5   r6   r7   c                    sZ   t    |j}t|| _t|||| dd| _t|| _t	||| dd| _
d S )Nr;   r7   z.ffn)r>   r?   r@   rK   rL   norm_1r2   rU   norm_2ru   ffn)rV   r4   r5   r6   r7   r{   r\   r.   r/   r?      s   
zMPTBlock.__init__r^   r_   r   c                 C   s@   |  |}| j||d}|| }| |}| |}|| }|S )N)r^   r_   )r   rU   r   r   )rV   r^   r_   r}   r.   r.   r/   rl      s   


zMPTBlock.forwardrm   rn   r.   r.   r\   r/   r      s(    r   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )MPTModelr3   r   vllm_configr7   c                   s   t    |jj|j |jjdksJ jdksJ tj	j
| _tj fdd| dd\| _| _| _tj
| _jr_|  D ]}t|dr^t|jtjr^|dd  qJtdgj
| _d S )	Nr!   low_precision_layernormc                    s   t  | dS )Nr   )r   r   r5   r4   r6   r.   r/   <lambda>   s    z#MPTModel.__init__.<locals>.<lambda>z.blocksr   r:   r_   )r>   r?   model_config	hf_configr5   r6   embedding_fraction	norm_typer   
vocab_sizer@   wter   n_layersstart_layer	end_layerblocksrK   rL   norm_frI   moduleshasattr
isinstancer:   	Parameterregister_parameterr   make_empty_intermediate_tensors)rV   r   r7   moduler\   r   r/   r?      s0   

zMPTModel.__init__	input_idsr   c                 C   s
   |  |S r~   )r   rV   r   r.   r.   r/   embed_input_ids   s   
zMPTModel.embed_input_idsNr^   intermediate_tensorsinputs_embedsc                 C   sz   t  jr|d ur|}n| |}n
|d usJ |d }t| j| j| jD ]}|||}q$t  js6td|iS | 	|}|S )Nr_   )
r	   is_first_rankr   r   r   r   r   is_last_rankr   r   )rV   r   r^   r   r   r_   blockr.   r.   r/   rl      s   
zMPTModel.forwardweightsc                 C   sp   t | jdd}t }|D ](\}}|dr||vrqt|| r!q|| }t|dt}||| || q|S )NF)remove_duplicatez.biasweight_loader)dictnamed_parameterssetendswithr   getattrr   add)rV   r   params_dictloaded_paramsnameloaded_weightparamr   r.   r.   r/   load_weights  s   

zMPTModel.load_weightsr~   )ro   rp   rq   r   rr   r?   r%   rs   r   r   rl   r   tupler   r   rt   r.   r.   r\   r/   r      s     
,r   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdejde	dB dejdB deje	B f
ddZ
dejdejdB fddZdeeeejf  dee fddZ  ZS )MPTForCausalLMr3   r   r   r7   c                   sf   t    |jj}|j}|| _|jsJ || _t|t|dd| _	| j	j
| _t|j| _| j	j| _d S )Ntransformer)r   r7   )r>   r?   r   r   r6   r4   tie_word_embeddingsr   r   r   r   lm_headr   r   logits_processorr   )rV   r   r7   r4   r6   r\   r.   r/   r?   &  s   



zMPTForCausalLM.__init__r   r   c                 C   s   | j |S r~   )r   r   r   r.   r.   r/   r   7  s   zMPTForCausalLM.embed_input_idsN	positionsr   r   c                 C   s   |  ||||}|S r~   )r   )rV   r   r   r   r   r_   r.   r.   r/   rl   :  s   zMPTForCausalLM.forwardr_   c                 C   s   |  | j|}|S r~   )r   r   )rV   r_   logitsr.   r.   r/   compute_logitsF  s   zMPTForCausalLM.compute_logitsr   c                 C   s   t | }||S r~   )r   r   )rV   r   loaderr.   r.   r/   r   M  s   
zMPTForCausalLM.load_weights)NN)ro   rp   rq   r   rr   r?   r%   rs   r   r   rl   r   r   r   r   r   rt   r.   r.   r\   r/   r   %  s,    

,r   )6r"   collections.abcr   	itertoolsr   r%   torch.nnrK   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   intrs   r0   Moduler2   ru   r   r   r   r.   r.   r.   r/   <module>   s@   	
`""H