o
    -i?                     @   s  d dl mZ d dlmZ d dlmZ d dlmZ d dlZd dlm	Z	 d dl
mZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZmZmZmZ d dlmZ d dlmZ d dlmZmZm Z  d dl!m"Z" d dl#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ d dl,m-Z- d dl.m/Z/ ddl0m1Z1m2Z2 ddl3m4Z4 ddl5m6Z6m7Z7m8Z8m9Z9m:Z:m;Z; G dd de	j<Z=G dd de	j<Z>G dd de	j<Z?eG d d! d!e	j<Z@G d"d# d#e	j<e2e1ZAe4d$d%G d&d' d'eAZBdS )(    )Iterable)partial)islice)AnyN)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_sizesplit_tensor_along_last_dim tensor_model_parallel_all_gather)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)pooler_for_token_classify)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)default_pooling_type)StageMissingLayeris_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixno_init_weightsc                       sH   e Zd Z		ddededededB deddf fd	d
Zdd Z  ZS )InternLM2MLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    sh   t    t||gd d|| dd| _t||d|| dd| _|dkr.td| dt | _d S )	N   Fz.gate_up_projbiasr-   r.   z.w2siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   w2
ValueErrorr   act_fn)selfr*   r+   r,   r-   r.   	__class__ a/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/internlm2.pyr5   6   s(   

zInternLM2MLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r6   r9   r7   )r:   xgate_up_r=   r=   r>   forwardS   s   
zInternLM2MLP.forward)Nr)   )	__name__
__module____qualname__intstrr   r5   rC   __classcell__r=   r=   r;   r>   r(   5   s"    r(   c                       s   e Zd Z					ddedededeeef dB ded	edB d
edB deddf fddZ	de
jfddZde
jde
jde
jfddZ  ZS )InternLM2AttentionN    r)   r*   	num_headsnum_kv_headsrope_parametersmax_position_embeddingscache_configr-   r.   r/   c	           	   	      sf  t    || _t | _t | _|| _| j| j dksJ | j| j | _|| _	| j	| jkr8| j	| j dks7J n
| j| j	 dksBJ t
d| j	| j | _|| j | _| j| j | _| j| j | _t| j| j | _| jd | _|| _t|| j| j| j	d|| dd| _t| j| j |d|| dd| _t| j||d| _t| j| j| j| j||| d	d
| _d S )Nr   r   g      Fz.wqkvr1   z.wo)max_positionrN   z.attn)rM   rP   r-   r.   )r4   r5   r*   r   tp_sizer   tp_ranktotal_num_headsrL   total_num_kv_headsmaxrM   head_dimq_sizekv_sizerG   key_value_groupsscalingrO   r   wqkvr   wor   
rotary_embr   attn)	r:   r*   rL   rM   rN   rO   rP   r-   r.   r;   r=   r>   r5   [   s`   
	
zInternLM2Attention.__init__qkvc                 C   s:  |j d }| jdkr=| j| j| jg| j }t|}tj||dd}|d d d |dd d  |dd d  }tj|dd}||| j	| j
d | j}tj|| j
ddgdd\}}}||| j| j }||| j| j }||| j| j }| jdkrtt| jd}||| j }||| j }||| j }|||fS )	Nr   r   )dim   r0   )num_partitions)shaperR   rX   rY   r   torchsplitcatviewrU   rZ   rW   reshaper   r   rS   )r:   r`   seq_lenqkv_mapqkvsplitterr=   r=   r>   	split_qkv   s(   

*

zInternLM2Attention.split_qkv	positionshidden_statesc           
      C   sP   |  |\}}| |\}}}| |||\}}| |||}| |\}	}|	S r?   )r\   rr   r^   r_   r]   )
r:   rs   rt   r`   rB   rn   ro   rp   attn_outputoutputr=   r=   r>   rC      s   zInternLM2Attention.forward)NrK   NNr)   )rD   rE   rF   rG   dictrH   r   r
   r   r5   rg   Tensorrr   rC   rI   r=   r=   r;   r>   rJ   Z   sB    	
CrJ   c                       sr   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )InternLMDecoderLayerNr)   configrP   r-   r.   r/   c              
      s   t    |j| _t|dd}t| j|j|j|j|||| dd| _t	| j|j
|j|| dd| _t|j|jd| _t|j|jd| _d S )NrO   rK   z
.attention)r*   rL   rM   rN   rO   rP   r-   r.   z.feed_forward)r*   r+   r,   r-   r.   eps)r4   r5   r*   getattrrJ   num_attention_headsnum_key_value_headsrN   	attentionr(   r+   r,   feed_forwardr   rms_norm_epsattention_normffn_norm)r:   rz   rP   r-   r.   rO   r;   r=   r>   r5      s,   

zInternLMDecoderLayer.__init__rs   rt   residualc                 C   sX   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| |}||fS )N)rs   rt   )r   r   r   r   )r:   rs   rt   r   r=   r=   r>   rC      s   
zInternLMDecoderLayer.forward)NNr)   )rD   rE   rF   r   r
   r   rH   r5   rg   rx   tuplerC   rI   r=   r=   r;   r>   ry      s0    ry   c                       s   e Zd Zdeddededee f fddZdej	d	ej	fd
dZ
		ddej	dej	dedB dej	dB d	ej	eB f
ddZ  ZS )InternLM2Modelr)   )r.   
layer_typevllm_configr.   r   c                   s   t    |jj|j |j| _j| _tjj	| _
tj fdd| dd\| _| _| _tj	jd| _tddgj	| _d S )Nc                    s    | dS )Nr.   r=   r   rP   rz   r   r-   r=   r>   <lambda>  s    z)InternLM2Model.__init__.<locals>.<lambda>z.layersr   r{   rt   r   )r4   r5   model_config	hf_configrP   r-   rz   
vocab_sizer   r*   tok_embeddingsr%   num_hidden_layersstart_layer	end_layerlayersr   r   normr$   make_empty_intermediate_tensors)r:   r   r.   r   r;   r   r>   r5      s&   


zInternLM2Model.__init__	input_idsr/   c                 C   s
   |  |S r?   )r   r:   r   r=   r=   r>   embed_input_ids  s   
zInternLM2Model.embed_input_idsNrs   intermediate_tensorsinputs_embedsc           	      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS | 	||\}}|S )Nrt   r   )rt   r   )
r   is_first_rankr   r   r   r   r   is_last_rankr   r   )	r:   r   rs   r   r   rt   r   layerrB   r=   r=   r>   rC     s    
zInternLM2Model.forwardNN)rD   rE   rF   ry   r   rH   typer5   rg   rx   r   r   rC   rI   r=   r=   r;   r>   r      s0    r   c                       s   e Zd ZdgddgdZdeddeded	ee f fd
dZde	j
de	j
fddZ	dde	j
de	j
dedB de	j
dB de	j
f
ddZde	j
de	j
dB fddZdeeee	j
f  dee fddZ  ZS )InternLM2ForCausalLMr\   w1w3)r\   r6   r)   r.   
model_typer   r.   r   c                   s   t    |jj}|j}|| _|| _||t|dd| _t|j	|j
|t|dd| _| jjr5| jjj| j_t|j	| _| jj| _d S )Nmodel)r   r.   rv   )r-   r.   )r4   r5   r   r   r-   rz   r&   r   r   r   r*   rv   tie_word_embeddingsr   weightr   logits_processorr   )r:   r   r.   r   rz   r-   r;   r=   r>   r5   >  s&   

zInternLM2ForCausalLM.__init__r   r/   c                 C   s   | j |S r?   )r   r   r   r=   r=   r>   r   \  s   z$InternLM2ForCausalLM.embed_input_idsNrs   r   r   c                 C   s   |  ||||}|S r?   )r   )r:   r   rs   r   r   rt   r=   r=   r>   rC   _  s   zInternLM2ForCausalLM.forwardrt   c                 C   s   |  | j|}|S r?   )r   rv   )r:   rt   logitsr=   r=   r>   compute_logitsk  s   z#InternLM2ForCausalLM.compute_logitsweightsc                 C   s   ddg}t |  }t }|D ]^\}}d|v rq|D ].\}}}	||vr$q|||}|dr4||vr4qt|| r:q|| }
|
j}||
||	  n|drS||vrSqt|| rYq|| }
t|
dt}||
| |	| q|S )N)r6   r   r   )r6   r   r   zrotary_emb.inv_freqz.biasweight_loader)
rw   named_parameterssetreplaceendswithr#   r   r}   r   add)r:   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   r=   r=   r>   load_weightsr  s:   


z!InternLM2ForCausalLM.load_weightsr?   )rD   rE   rF   packed_modules_mappingr   r   rH   r   r5   rg   rx   r   r   rC   r   r   r   r   r   rI   r=   r=   r;   r>   r   8  s@    	

,r   ALL)tok_pooling_typec                       sr   e Zd ZdZdeddededee f fddZ				dd
e	j
de	j
ded	B de	j
d	B de	j
eB f
ddZ  ZS )InternLM2ForRewardModelTr)   r   r   r.   r   c             	      s   t | dd ttfd t j|||d W d    n1 sw   Y  |jj}|jj| _t|j	ddd| jt
|ddd| _|jj}|d usHJ t|| _d S )	Nc                 S   s
   t d| S )Nrv   )r"   )modr=   r=   r>   r     s   
 z2InternLM2ForRewardModel.__init__.<locals>.<lambda>)targets)r   r.   r   r   Fv_head)r2   input_is_parallelparams_dtyper.   return_bias)r'   r   r   r4   r5   r   r   
head_dtyper   r*   r&   r   pooler_configr   pooler)r:   r   r.   r   rz   r   r;   r=   r>   r5     s2   

z InternLM2ForRewardModel.__init__Nr   rs   r   r   r/   c                 C   s*   |  ||||}|| j}| |}|S r?   )r   tor   r   )r:   r   rs   r   r   rt   r   r=   r=   r>   rC     s   
zInternLM2ForRewardModel.forwardr   )rD   rE   rF   is_pooling_modelr   r   rH   r   r5   rg   rx   r   rC   rI   r=   r=   r;   r>   r     s0    (r   )Ccollections.abcr   	functoolsr   	itertoolsr   typingr   rg   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr	   vllm.configr
   r   vllm.distributedr   r   r   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   )vllm.model_executor.layers.pooler.tokwiser   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r    interfaces_baser!   utilsr"   r#   r$   r%   r&   r'   Moduler(   rJ   ry   r   r   r   r=   r=   r=   r>   <module>   s@    
%i6>_