o
    if=                     @   s  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZmZmZ ddlmZ ddlmZ ddlmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z&m'Z' ddl(m)Z)m*Z*m+Z+m,Z,m-Z-m.Z. G dd dej/Z0G dd dej1Z2G dd dej1Z3G dd dej1Z4e
G dd dej1Z5G dd  d ej1e'e&Z6dS )!z=Inference-only OPT model compatible with HuggingFace weights.    )Iterable)isliceN)nn)	OPTConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)	Attention)ColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderWeightsMapperis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       s:   e Zd Zdedef fddZdejf fddZ  ZS )OPTLearnedPositionalEmbeddingnum_embeddingsembedding_dimc                    s   d| _ t || j  | d S )N   )offsetsuper__init__)selfr!   r"   	__class__ T/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/opt.pyr&   >   s   z&OPTLearnedPositionalEmbedding.__init__	positionsc                    s   t  || j S N)r%   forwardr$   )r'   r,   r(   r*   r+   r.   E   s   z%OPTLearnedPositionalEmbedding.forward)	__name__
__module____qualname__intr&   torchTensorr.   __classcell__r*   r*   r(   r+   r    =   s    r    c                       sb   e Zd Z				ddededededB dedB d	ed
df fddZde	j
d
e	j
fddZ  ZS )OPTAttentionTN 	embed_dim	num_headsbiascache_configquant_configprefixreturnc           	         s   t    || _t }|}|| dksJ || | _|| | _| jd | _t|| j|||| dd| _t	||||| dd| _
t| j| j| j||| dd| _d S )Nr   g      z	.qkv_projr:   r<   r=   z	.out_projz.attn)scaler;   r<   r=   )r%   r&   r8   r
   r9   head_dimscalingr   qkv_projr   out_projr   attn)	r'   r8   r9   r:   r;   r<   r=    tensor_model_parallel_world_sizetotal_num_headsr(   r*   r+   r&   J   s>   
	

zOPTAttention.__init__hidden_statesc           	      C   sB   |  |\}}|jddd\}}}| |||}| |\}}|S )N   )chunksdim)rC   chunkrE   rD   )	r'   rH   qkv_qkvattn_outputoutputr*   r*   r+   r.   t   s
   zOPTAttention.forward)TNNr7   )r/   r0   r1   r2   boolr   r   strr&   r3   r4   r.   r5   r*   r*   r(   r+   r6   I   s2    *r6   c                	       sT   e Zd Z			ddededB dedB def fddZd	ej	d
ej	fddZ
  ZS )OPTDecoderLayerNr7   configr;   r<   r=   c                    s   t    || _|j| _t| j|j|j||| dd| _|j	| _	t
j| j|jd| _t| j|j|j|| dd| _t|j| _t|j| j|j|| dd| _t
j| j|jd| _d S )Nz
.self_attn)r8   r9   r:   r;   r<   r=   elementwise_affinez.fc1r?   z.fc2)r%   r&   rX   hidden_sizer8   r6   num_attention_headsenable_bias	self_attndo_layer_norm_beforer   	LayerNormlayer_norm_elementwise_affineself_attn_layer_normr   ffn_dimfc1r   activation_functionactivation_fnr   fc2final_layer_normr'   rX   r;   r<   r=   r(   r*   r+   r&      sB   
zOPTDecoderLayer.__init__rH   r>   c                 C   s   |}| j r
| |}| j|d}|| }| j s| |}|}| j r&| |}| |\}}| |}| |\}}|| }| j sE| |}|S )N)rH   )r_   rb   r^   rh   rd   rf   rg   )r'   rH   residualrO   r*   r*   r+   r.      s"   




zOPTDecoderLayer.forwardNNr7   )r/   r0   r1   r   r   r   rV   r&   r3   r4   r.   r5   r*   r*   r(   r+   rW      s$    *rW   c                       s   e Zd Z			ddededB dedB def fddZd	ej	d
ej	fddZ
	dd	ej	dB dej	dedB dej	dB d
ej	eB f
ddZ  ZS )
OPTDecoderNr7   rX   r;   r<   r=   c                    s   t    | _j| _j| _tjj| _t	jj
| _jj
kr6tj
jd| dd| _nd | _jj
krOtjj
d| dd| _nd | _jrcjsctjj
jd| _nd | _tj fdd| dd	\| _| _| _d S )
NFz.project_outr?   z.project_inrY   c                    s   t  | dS )Nr=   )rW   rm   r;   rX   r<   r*   r+   <lambda>  s    z%OPTDecoder.__init__.<locals>.<lambda>z.layersrm   )r%   r&   rX   max_position_embeddingsmax_target_positions
vocab_sizer   word_embed_proj_dimembed_tokensr    r[   embed_positionsr   project_out
project_inr_   _remove_final_layer_normr   r`   ra   rh   r   num_hidden_layersstart_layer	end_layerlayersri   r(   rn   r+   r&      sP   



zOPTDecoder.__init__	input_idsr>   c                 C   s
   |  |S r-   )rt   r'   r}   r*   r*   r+   embed_input_ids	  s   
zOPTDecoder.embed_input_idsr,   intermediate_tensorsinputs_embedsc           	      C   s   t  jr#|d u r| |}| |}| jd ur| |\}}|| }n
|d us)J |d }t| j| j| jD ]}||}q6t  j	sGt
d|iS | jd urQ| |}| jd ur]| |\}}|S )NrH   )r	   is_first_rankr   ru   rw   r   r|   rz   r{   is_last_rankr   rh   rv   )	r'   r}   r,   r   r   
pos_embedsrO   rH   layerr*   r*   r+   r.     s$   







zOPTDecoder.forwardrk   r-   )r/   r0   r1   r   r   r   rV   r&   r3   r4   r   r   r.   r5   r*   r*   r(   r+   rl      s4    @rl   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )OPTModelr7   rm   vllm_configr=   c                   sJ   t    |jj}|j}|j}t|||| dd| _tdg|j	| _
d S )Nz.decoderrm   rH   )r%   r&   model_config	hf_configr;   r<   rl   decoderr   r[   make_empty_intermediate_tensors)r'   r   r=   rX   r;   r<   r(   r*   r+   r&   ,  s   

zOPTModel.__init__r}   r>   c                 C      | j |S r-   )r   r   r~   r*   r*   r+   r   :     zOPTModel.embed_input_idsNr,   r   r   c                 C   s   | j ||||dS )N)r   )r   )r'   r}   r,   r   r   r*   r*   r+   r.   =  s   zOPTModel.forwardweightsc                 C   s   g d}t | jdd}t }|D ]Y\}}|D ].\}}}	||vr!q|||}|dr1||vr1qt|| r7q|| }
|
j}||
||	  n|drP||vrPqt|| rVq|| }
t|
dt}||
| |	| q|S )N))rC   q_projrP   )rC   k_projrQ   )rC   v_projrR   F)remove_duplicatez.biasweight_loader)
dictnamed_parameterssetreplaceendswithr   r   getattrr   add)r'   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   r*   r*   r+   load_weightsH  s2   


zOPTModel.load_weightsr-   )r/   r0   r1   r   rV   r&   r3   r4   r   r   r.   r   tupler   r   r5   r*   r*   r(   r+   r   *  s     
,r   c                       s   e Zd Zdg diZeddidZddded	ef fd
dZde	j
de	j
fddZ		dde	j
dB de	j
dedB de	j
dB de	j
eB f
ddZde	j
de	j
dB fddZdeeee	j
f  dee fddZ  ZS )OPTForCausalLMrC   )r   r   r   zdecoder.zmodel.decoder.)orig_to_new_prefixr7   rm   r   r=   c                   s   t    |jj}|j}|| _|| _t|t|dd| _| jj	r'| jj
j| _nt|j|jt|dd| _t|j| _| jj| _d S )Nmodel)r   r=   lm_headrm   )r%   r&   r   r   r<   rX   r   r   r   tie_word_embeddingsr   rt   r   r   rr   rs   r   logits_processorr   )r'   r   r=   rX   r<   r(   r*   r+   r&   w  s$   

zOPTForCausalLM.__init__r}   r>   c                 C   r   r-   )r   r   r~   r*   r*   r+   r     r   zOPTForCausalLM.embed_input_idsNr,   r   r   c                 C   s   |  ||||}|S r-   )r   )r'   r}   r,   r   r   rH   r*   r*   r+   r.     s   zOPTForCausalLM.forwardrH   c                 C   s   |  | j|}|S r-   )r   r   )r'   rH   logitsr*   r*   r+   compute_logits  s   zOPTForCausalLM.compute_logitsr   c                 C   s*   t | | jjr	dgnd d}|j|| jdS )Nzlm_head.weight)skip_prefixes)mapper)r   rX   r   r   hf_to_vllm_mapper)r'   r   loaderr*   r*   r+   r     s
   zOPTForCausalLM.load_weights)NN)r/   r0   r1   packed_modules_mappingr   r   r   rV   r&   r3   r4   r   r   r.   r   r   r   r   r   r5   r*   r*   r(   r+   r   l  s8    

,r   )7__doc__collections.abcr   	itertoolsr   r3   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   r   	Embeddingr    Moduler6   rW   rl   r   r   r*   r*   r*   r+   <module>   s4    
6IbA