o
    -i;4                     @   sp  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z. G dd dej/Z0G dd dej/Z1G dd dej/Z2eG dd dej/Z3G dd dej/e(Z4dS ) zPyTorch Starcoder2 model.    )Iterable)isliceN)nn)Starcoder2Config)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loadermaybe_remap_kv_scale_name)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                	       Z   e Zd Z			ddededB dedB def fddZd	ej	d
ej	dej	fddZ
  ZS )Starcoder2AttentionN configcache_configquant_configprefixc              	      sd  t    || _|j| _t }|j| _| j| dksJ | j| | _|j| _	| j	|kr5| j	| dks4J n	|| j	 dks>J t
d| j	| | _| j| j | _| j| j | _| j| j | _| jd | _|j| _|j| _t| j| j| j| j	| j|| dd| _t| j| j | j| j|| dd| _t| j| j|jdd| _t| j| j| j| j||| d	d
| _d S )Nr   r   g      z	.qkv_projbiasr$   r%   z.o_projT)max_positionrope_parametersis_neox_stylez.attn)num_kv_headsr#   r$   r%   )super__init__r"   hidden_sizer   num_attention_headstotal_num_heads	num_headsnum_key_value_headstotal_num_kv_headsmaxr+   head_dimq_sizekv_sizescalingmax_position_embeddingsuse_biasr   qkv_projr   o_projr   r)   
rotary_embr   attn)selfr"   r#   r$   r%   tp_size	__class__ b/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/starcoder2.pyr-   A   sb   

	
zStarcoder2Attention.__init__	positionshidden_statesreturnc           
      C   s`   |  |\}}|j| j| j| jgdd\}}}| |||\}}| |||}| |\}	}|	S )N)dim)r;   splitr6   r7   r=   r>   r<   )
r?   rE   rF   qkv_qkvattn_outputoutputrC   rC   rD   forward   s    zStarcoder2Attention.forwardNNr!   __name__
__module____qualname__r   r   r   strr-   torchTensorrR   __classcell__rC   rC   rA   rD   r    @   s(    @r    c                       sJ   e Zd Z		ddededB def fddZdejd	ejfd
dZ	  Z
S )Starcoder2MLPNr!   r"   r$   r%   c                    sZ   t    t|j|j|j|| dd| _t|j|j|j|| dd| _t	|j
| _d S )Nz.c_fcr&   z.c_proj)r,   r-   r   r.   intermediate_sizer:   c_fcr   c_projr   
hidden_actact)r?   r"   r$   r%   rA   rC   rD   r-      s    
zStarcoder2MLP.__init__rF   rG   c                 C   s*   |  |\}}| |}| |\}}|S N)r^   ra   r_   )r?   rF   rL   rC   rC   rD   rR      s   
zStarcoder2MLP.forward)Nr!   )rU   rV   rW   r   r   rX   r-   rY   rZ   rR   r[   rC   rC   rA   rD   r\      s    r\   c                	       r   )Starcoder2DecoderLayerNr!   r"   r#   r$   r%   c                    sl   t    |j| _t|||| dd| _t||| dd| _tj|j|j	d| _
tj|j|j	d| _d S )Nz
.self_attnr$   r%   z.mlpeps)r,   r-   r.   r    	self_attnr\   mlpr   	LayerNormnorm_epsiloninput_layernormpost_attention_layernorm)r?   r"   r#   r$   r%   rA   rC   rD   r-      s   
zStarcoder2DecoderLayer.__init__rE   rF   rG   c                 C   sH   |}|  |}| j||d}|| }|}| |}| |}|| }|S )N)rE   rF   )rk   rg   rl   rh   )r?   rE   rF   residualrC   rC   rD   rR      s   


zStarcoder2DecoderLayer.forwardrS   rT   rC   rC   rA   rD   rc      s(    rc   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )Starcoder2Modelr!   r%   vllm_configr%   c                   s   t    |jj|j |j| _j| _tjj	| dd| _
tj fdd| dd\| _| _| _tjj	jd| _tdgj	| _d S )	Nz.embed_tokensrd   c                    s   t  | dS )Nrd   )rc   ro   r#   r"   r$   rC   rD   <lambda>   s    z*Starcoder2Model.__init__.<locals>.<lambda>z.layersro   re   rF   )r,   r-   model_config	hf_configr#   r$   r"   
vocab_sizer   r.   embed_tokensr   num_hidden_layersstart_layer	end_layerlayersr   ri   rj   normr   make_empty_intermediate_tensors)r?   rp   r%   rA   rq   rD   r-      s*   

zStarcoder2Model.__init__	input_idsrG   c                 C   s
   |  |S rb   )rv   r?   r}   rC   rC   rD   embed_input_ids   s   
zStarcoder2Model.embed_input_idsNrE   intermediate_tensorsinputs_embedsc                 C   sz   t  jr|d ur|}n| |}n
|d usJ |d }t| j| j| jD ]}|||}q$t  js6td|iS | 	|}|S )NrF   )
r
   is_first_rankr   r   rz   rx   ry   is_last_rankr   r{   )r?   r}   rE   r   r   rF   layerrC   rC   rD   rR      s   
zStarcoder2Model.forwardweightsc                 C   s   g d}t | jdd}t }|D ]O\}}|D ]$\}}}	||vr!q|||}t|| r-q|| }
|
j}||
||	  nt||}|d u rFqt|| rLq|| }
t|
dt}||
| |	| q|S )N))r;   q_projrM   )r;   k_projrN   )r;   v_projrO   F)remove_duplicateweight_loader)
dictnamed_parameterssetreplacer   r   r   getattrr   add)r?   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   rC   rC   rD   load_weights  s0   



zStarcoder2Model.load_weightsrb   )rU   rV   rW   r	   rX   r-   rY   rZ   r   r   rR   r   tupler   r   r[   rC   rC   rA   rD   rn      s     
,rn   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdejde	dB dejdB deje	B f
ddZ
dejdejdB fddZdeeeejf  dee fddZ  ZS )Starcoder2ForCausalLMr!   ro   rp   r%   c                   s   t    |jj}|j}|| _t|t|dd| _|j	| _	|j
r&| jj| _nt|j	|j|| dd| _t|j	| _| jj| _d S )Nmodel)rp   r%   z.lm_headrd   )r,   r-   rs   rt   r$   r"   rn   r   r   ru   tie_word_embeddingsrv   lm_headr   r.   r   logits_processorr|   )r?   rp   r%   r"   r$   rA   rC   rD   r-   6  s&   

zStarcoder2ForCausalLM.__init__r}   rG   c                 C   s   | j |S rb   )r   r   r~   rC   rC   rD   r   N  s   z%Starcoder2ForCausalLM.embed_input_idsNrE   r   r   c                 C   s   |  ||||}|S rb   )r   )r?   r}   rE   r   r   rF   rC   rC   rD   rR   Q  s   zStarcoder2ForCausalLM.forwardrF   c                 C   s   |  | j|}|S rb   )r   r   )r?   rF   logitsrC   rC   rD   compute_logits]  s   z$Starcoder2ForCausalLM.compute_logitsr   c                 C   s$   t | | jjr	dgnd d}||S )Nzlm_head.weight)skip_prefixes)r   r"   r   r   )r?   r   loaderrC   rC   rD   r   d  s
   
z"Starcoder2ForCausalLM.load_weights)NN)rU   rV   rW   r	   rX   r-   rY   rZ   r   r   rR   r   r   r   r   r   r[   rC   rC   rA   rD   r   5  s,    

,r   )5__doc__collections.abcr   	itertoolsr   rY   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   %vllm.model_executor.layers.activationr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler    r\   rc   rn   r   rC   rC   rC   rD   <module>   s4   	N/X