o
    
۾i/                     @   sf  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZmZ ddl m!Z! ddl"m#Z# ddl$m%Z%m&Z& ddl'm(Z(m)Z)m*Z*m+Z+m,Z, G dd dej-Z.G dd dej-Z/G dd dej-Z0e
G dd dej-Z1G dd dej-e%e&Z2dS )zDInference-only GPTBigCode model compatible with HuggingFace weights.    )Iterable)isliceN)nn)GPTBigCodeConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)	Attention)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                	       T   e Zd Z			ddededB dedB def fddZd	ej	d
ej	fddZ
  ZS )GPTBigCodeAttentionN configcache_configquant_configprefixc              	      s   t    |j| _|j}t | _|| j dksJ || j | _| j| | _| jd | _|j	| _	| j	r8d}d| _
n|}| j| _
| j| j
 | _t| j| j||d|| dd| _t| j| jd|| dd| _t| j| j| j| j
||| dd	| _d S )
Nr   g      r   Tz.c_attnbiasr#   r$   .c_proj.attn)scalenum_kv_headsr"   r#   r$   )super__init__hidden_sizenum_attention_headsr
    tensor_model_parallel_world_size	num_headshead_dimr)   multi_queryr*   kv_dimr   c_attnr   c_projr   attn)selfr!   r"   r#   r$   total_num_headstotal_num_kv_heads	__class__ Z/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/gpt_bigcode.pyr,   =   sP   

zGPTBigCodeAttention.__init__hidden_statesreturnc                 C   sT   |  |\}}|j| j| j | j| jgdd\}}}| |||}| |\}}|S )N)dim)r4   splitr-   r/   r3   r6   r5   )r7   r>   qkv_qkvattn_outputr<   r<   r=   forwardp   s   
zGPTBigCodeAttention.forwardNNr    __name__
__module____qualname__r   r   r   strr,   torchTensorrI   __classcell__r<   r<   r:   r=   r   <   s$    3r   c                	       sN   e Zd Z		ddedededB def fddZd	ej	d
ej	fddZ
  ZS )	GPTBigMLPNr    intermediate_sizer!   r#   r$   c                    sT   t    |j}t||d|| dd| _t||d|| dd| _t|j| _	d S )NTz.c_fcr%   r'   )
r+   r,   r-   r   c_fcr   r5   r   activation_functionact)r7   rT   r!   r#   r$   r-   r:   r<   r=   r,      s"   
zGPTBigMLP.__init__r>   r?   c                 C   s*   |  |\}}| |}| |\}}|S N)rU   rW   r5   )r7   r>   rD   r<   r<   r=   rI      s   
zGPTBigMLP.forward)Nr    )rL   rM   rN   intr   r   rO   r,   rP   rQ   rI   rR   r<   r<   r:   r=   rS      s    rS   c                	       r   )GPTBigCodeBlockNr    r!   r"   r#   r$   c                    s   t    |j}|jd ur|jnd| }tj||jd| _t|||| dd| _	tj||jd| _
t|||| dd| _d S )N   epsr(   r$   z.mlp)r+   r,   r-   n_innerr   	LayerNormlayer_norm_epsilonln_1r   r6   ln_2rS   mlp)r7   r!   r"   r#   r$   r-   	inner_dimr:   r<   r=   r,      s   
zGPTBigCodeBlock.__init__r>   r?   c                 C   sF   |}|  |}| j|d}|| }|}| |}| |}|| }|S )N)r>   )rb   r6   rc   rd   )r7   r>   residualrH   feed_forward_hidden_statesr<   r<   r=   rI      s   


zGPTBigCodeBlock.forwardrJ   rK   r<   r<   r:   r=   rZ      s$    rZ   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )GPTBigCodeModelr    r^   vllm_configr$   c                   s   t    |jj|j |j| _jrJ j| _	j
| _
t| j
| j	j
d| _tj| j	| _tj fdd| dd\| _| _| _tj| j	jd| _tdgj| _d S )N)org_num_embeddingsc                    s   t  | dS )Nr^   )rZ   r^   r"   r!   r#   r<   r=   <lambda>   s    z*GPTBigCodeModel.__init__.<locals>.<lambda>z.hr^   r\   r>   )r+   r,   model_config	hf_configr"   r#   r!   add_cross_attentionr-   	embed_dim
vocab_sizer   wter   	Embeddingmax_position_embeddingswper   num_hidden_layersstart_layer	end_layerhr`   ra   ln_fr   n_embdmake_empty_intermediate_tensors)r7   ri   r$   r:   rk   r=   r,      s*   


zGPTBigCodeModel.__init__	input_idsr?   c                 C   s
   |  |S rX   )rr   r7   r}   r<   r<   r=   embed_input_ids   s   
zGPTBigCodeModel.embed_input_idsNposition_idsintermediate_tensorsinputs_embedsc                 C   st   t  jr|d u r| |}|| | }n|d }t| j| j| jD ]}||}q"t  js3t	d|iS | 
|}|S )Nr>   )r	   is_first_rankr   ru   r   ry   rw   rx   is_last_rankr   rz   )r7   r}   r   r   r   r>   layerr<   r<   r=   rI      s   


zGPTBigCodeModel.forwardweightsc                 C   s   t | jdd}t }|D ]:\}}d|v rqt|| rq|| }t|dt}d|v r=|||d |||d |||d n||| || q|S )	NF)remove_duplicatez
.attn.biasweight_loaderzc_attn.input_scalerE   rF   rG   )dictnamed_parameterssetr   getattrr   add)r7   r   params_dictloaded_paramsnameloaded_weightparamr   r<   r<   r=   load_weights  s    

zGPTBigCodeModel.load_weightsrX   )rL   rM   rN   r   rO   r,   rP   rQ   r   r   rI   r   tupler   r   rR   r<   r<   r:   r=   rh      s     
,rh   c                       s   e Zd ZddgiZdddedef fddZdejd	ejfd
dZ			ddejdB dejde
dB dejdB d	eje
B f
ddZdejd	ejdB fddZdeeeejf  d	ee fddZ  ZS )GPTBigCodeForCausalLMr4   r    r^   ri   r$   c                   s   t    |jj}|j}|| _|| _t|t|dd| _| jj	r&| jj
| _nt| jj| jjt|dd| _t|j| _| jj| _d S )Ntransformer)ri   r$   lm_headr^   )r+   r,   rm   rn   r#   r!   rh   r   r   tie_word_embeddingsrr   r   r   rq   rp   r   logits_processorr|   )r7   ri   r$   r!   r#   r:   r<   r=   r,     s$   

zGPTBigCodeForCausalLM.__init__r}   r?   c                 C   s   | j |S rX   )r   r   r~   r<   r<   r=   r   5  s   z%GPTBigCodeForCausalLM.embed_input_idsN	positionsr   r   c                 C   s   |  ||||}|S rX   )r   )r7   r}   r   r   r   r>   r<   r<   r=   rI   8  s   zGPTBigCodeForCausalLM.forwardr>   c                 C   s   |  | j|}|S rX   )r   r   )r7   r>   logitsr<   r<   r=   compute_logitsD  s   z$GPTBigCodeForCausalLM.compute_logitsr   c                 C   s(   d }| j jr	dg}t| |d}||S )Nzlm_head.)skip_prefixes)r!   r   r   r   )r7   r   r   loaderr<   r<   r=   r   K  s   
z"GPTBigCodeForCausalLM.load_weights)NN)rL   rM   rN   packed_modules_mappingr   rO   r,   rP   rQ   r   r   rI   r   r   r   r   r   rR   r<   r<   r:   r=   r     s.    


,r   )3__doc__collections.abcr   	itertoolsr   rP   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   Moduler   rS   rZ   rh   r   r<   r<   r<   r=   <module>   s2   	F!'N