o
    -iE                     @   s  d Z ddlZddlmZ ddlmZ ddlZddlmZ ddlm	Z	 ddl
mZ ddlmZ dd	lmZmZ dd
lmZmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlm Z  ddl!m"Z" ddl#m$Z$m%Z% ddl&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z-m.Z. ddl/m0Z0m1Z1m2Z2m3Z3m4Z4 de5dej6fddZ7G dd dej8Z9G dd dej8Z:G dd  d ej8Z;eG d!d" d"ej8Z<G d#d$ d$ej8e,e-e.Z=G d%d& d&e=Z>G d'd( d(e=Z?dS ))zBInference-only BaiChuan model compatible with HuggingFace weights.    N)Iterable)islice)nn)PretrainedConfig)	Attention)support_torch_compile)CacheConfig
VllmConfig)get_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size)
SiluAndMul)RMSNorm)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loaderrow_parallel_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPPSupportsQuant)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixtotal_num_headsreturnc                 C   s   dt t |  }tjddt |d     tjd}tjdd| tjd}t||}|| kritjddt d| d     tjd}t	|| | }tjddd|  dtjd}tj
|t||gdd}|S )N      )dtyper   )startendstepr'   r   )dim)mathfloorlog2torchtensorfloat32arangeint32powmincat)r#   closest_power_of_2basepowersslopes
extra_basenum_remaining_headsextra_powers r>   `/home/ubuntu/veenaModal/venv/lib/python3.10/site-packages/vllm/model_executor/models/baichuan.py_get_alibi_slopesF   s(   r@   c                       sD   e Zd Z		ddededededB def
 fdd	Zd
d Z  ZS )BaiChuanMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixc                    sh   t    t||gd d|| dd| _t||d|| dd| _|dkr.td| dt | _d S )	Nr%   Fz.gate_up_projbiasrF   rG   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfrC   rD   rE   rF   rG   	__class__r>   r?   rL   _   s(   

zBaiChuanMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)rM   rP   rN   )rQ   xgate_up_r>   r>   r?   forward|   s   
zBaiChuanMLP.forward)NrB   )	__name__
__module____qualname__intstrr   rL   rX   __classcell__r>   r>   rR   r?   rA   ^   s    rA   c                       sp   e Zd ZdZ				ddedededed	ed
edB dedB def fddZ	de
jde
jde
jfddZ  ZS )BaiChuanAttentionz=Multi-headed attention from 'Attention Is All You Need' paper    NrB   rC   	num_headsposition_embeddingrope_parametersmax_position_embeddingscache_configrF   rG   c	              	      sN  t    || _t }	|| _| j|	 dksJ | j|	 | _|| j | _|| _|| _t	|| j| j| jd|| dd| _
t| j| j |d|| dd| _| jdkrt }
|
| j }|
d | j }t| j}|||  }| jd }t| j| j|||| d	d
| _d S t| j| j|d| _| jd | _t| j| j| j||| d	d| _d S )Nr   Fz.W_packrH   z.o_projALIBIr   g      z.attn)alibi_slopesrF   rG   )max_positionrc   )re   rF   rG   )rK   rL   rC   r   r#   ra   head_dimrb   rd   r   W_packr   o_projr   r@   tolistr   attnr   
rotary_embscaling)rQ   rC   ra   rb   rc   rd   re   rF   rG    tensor_model_parallel_world_sizetp_rank
head_starthead_endrg   ro   rR   r>   r?   rL      sl   
	




	zBaiChuanAttention.__init__	positionshidden_statesr$   c           
      C   s^   |  |\}}|jddd\}}}| jdkr| |||\}}| |||}| |\}	}|	S )Nr&   )chunksr+   rf   )rj   chunkrb   rn   rm   rk   )
rQ   rt   ru   qkvrW   qkvattn_outputoutputr>   r>   r?   rX      s   
zBaiChuanAttention.forward)r`   NNrB   )rY   rZ   r[   __doc__r\   r]   dictr   r   rL   r/   TensorrX   r^   r>   r>   rR   r?   r_      s<    	Gr_   c                       sr   e Zd Z			ddedededB dedB def
 fdd	Zd
ej	dej	dej	dB de
ej	ej	f fddZ  ZS )BaiChuanDecoderLayerNrB   configrb   re   rF   rG   c              
      s   t    |j| _t|dd}t| j|j|t|dd |||| dd| _t| j|j|j	|| dd| _
t|j|jd| _t|j|jd| _d S )	Nrd   r`   rc   z
.self_attn)rC   ra   rb   rc   rd   re   rF   rG   z.mlp)rC   rD   rE   rF   rG   eps)rK   rL   rC   getattrr_   num_attention_heads	self_attnrA   rD   rE   mlpr   rms_norm_epsinput_layernormpost_attention_layernorm)rQ   r   rb   re   rF   rG   rd   rR   r>   r?   rL      s0   


zBaiChuanDecoderLayer.__init__rt   ru   residualr$   c                 C   sX   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| |}||fS )N)rt   ru   )r   r   r   r   )rQ   rt   ru   r   r>   r>   r?   rX      s   
zBaiChuanDecoderLayer.forward)NNrB   )rY   rZ   r[   r   r]   r   r   rL   r/   r   tuplerX   r^   r>   r>   rR   r?   r      s0    !r   c                       s   e Zd Z		ddedededdf fdd	Zd
ejdejfddZ	dd
ejdejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )BaiChuanModelrB   ROPEvllm_configrG   rb   r$   Nc                    s   t    |jj|j |j| _j| _tjj	| _
tj fdd| dd\| _| _| _tj	jd| _tddgj	| _d S )Nc                    s   t  | dS )NrG   )r   r   re   r   rb   rF   r>   r?   <lambda>+  s    
z(BaiChuanModel.__init__.<locals>.<lambda>z.layersr   r   ru   r   )rK   rL   model_config	hf_configre   rF   r   
vocab_sizer   rC   embed_tokensr!   num_hidden_layersstart_layer	end_layerlayersr   r   normr    make_empty_intermediate_tensors)rQ   r   rG   rb   rR   r   r?   rL     s&   


zBaiChuanModel.__init__	input_idsc                 C   s
   |  |S rT   )r   rQ   r   r>   r>   r?   embed_input_ids5  s   
zBaiChuanModel.embed_input_idsrt   intermediate_tensorsinputs_embedsc           	      C   s   t  jr|d ur|}n| |}d }n|d usJ |d }|d }t| j| j| jD ]
}||||\}}q*t  js@t||dS | 	||\}}|S )Nru   r   )ru   r   )
r
   is_first_rankr   r   r   r   r   is_last_rankr   r   )	rQ   r   rt   r   r   ru   r   layerrW   r>   r>   r?   rX   8  s,   

zBaiChuanModel.forwardweightsc                 C   s   ddg}t |  }t }|D ]^\}}d|v rq|D ].\}}}	||vr$q|||}|dr4||vr4qt|| r:q|| }
|
j}||
||	  n|drS||vrSqt|| rYq|| }
t|
dt}||
| |	| q|S )N)rM   	gate_projr   )rM   up_projr   zrotary_emb.inv_freqz.biasweight_loader)
r   named_parameterssetreplaceendswithr   r   r   r   add)rQ   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   r>   r>   r?   load_weightsY  s:   


zBaiChuanModel.load_weights)rB   r   rT   )rY   rZ   r[   r	   r]   rL   r/   r   r   r   rX   r   r   r   r   r^   r>   r>   rR   r?   r     s4    
,!r   c                       s   e Zd ZdgddgdZdddded	ed
ef fddZdejdejfddZ			d!dejdejde
dB dejdB deje
B f
ddZdejdejdB fddZdeeeejf  dee fddZdejdejfdd Z  ZS )"BaiChuanBaseForCausalLMrj   r   r   )rj   rM   rB   r   )rG   rb   r   rG   rb   c                   s   t    |jj}|j}|| _t | _|| _t|||d| _	t
|j|j|t|dd| _| j| jj_| jjr=| j	jj| j_t|j| _| j	j| _d S )Nr   rG   rb   lm_head)rF   rG   )rK   rL   r   r   rF   r   r   tp_sizer   modelr   r   rC   r"   r   lm_head_weight_loaderweightr   tie_word_embeddingsr   r   logits_processorr   )rQ   r   rG   rb   r   rF   rR   r>   r?   rL     s.   
z BaiChuanBaseForCausalLM.__init__r   r$   c                 C   s   | j |S rT   )r   r   r   r>   r>   r?   r     s   z'BaiChuanBaseForCausalLM.embed_input_idsNrt   r   r   c                 C   s   |  ||||}|S rT   )r   )rQ   r   rt   r   r   ru   r>   r>   r?   rX     s   zBaiChuanBaseForCausalLM.forwardru   c                 C   s   |  | j|}|S rT   )r   r   )rQ   ru   logitsr>   r>   r?   compute_logits  s   z&BaiChuanBaseForCausalLM.compute_logitsr   c                 C   s   t | }||S rT   )r   r   )rQ   r   loaderr>   r>   r?   r     s   
z$BaiChuanBaseForCausalLM.load_weightsr   r   c                 C   sD   | j jdk}|rtjj|}| jdkrt|| d S t|| d S )Ni  r   )	r   r   r/   r   
functional	normalizer   r   r   )rQ   r   r   is_baichuan2r>   r>   r?   r     s   
z-BaiChuanBaseForCausalLM.lm_head_weight_loader)NN)rY   rZ   r[   packed_modules_mappingr	   r]   rL   r/   r   r   r   rX   r   r   r   r   r   r   	Parameterr   r^   r>   r>   rR   r?   r     sH    "

$r   c                       0   e Zd ZdZdddedef fddZ  ZS )BaichuanForCausalLMzVBaichuan 13B and Baichuan2 7B/13B.
    NOTE: the class name has a lower case 'c'.
    rB   r   r   rG   c                   s>   |j j}|jdkrt j||dd d S t j||dd d S )Ni   r   r   rf   )r   r   rC   rK   rL   )rQ   r   rG   r   rR   r>   r?   rL     s   


zBaichuanForCausalLM.__init__rY   rZ   r[   r   r	   r]   rL   r^   r>   r>   rR   r?   r         $r   c                       r   )BaiChuanForCausalLMzABaichuan 7B.
    NOTE: the class name has an upper case 'C'.
    rB   r   r   rG   c                   s   t  j||dd d S )Nr   r   )rK   rL   )rQ   r   rG   rR   r>   r?   rL     s   
zBaiChuanForCausalLM.__init__r   r>   r>   rR   r?   r     r   r   )@r   r,   collections.abcr   	itertoolsr   r/   r   transformersr   vllm.attention.layerr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   r   vllm.sequencer   
interfacesr   r   r   utilsr   r   r    r!   r"   r\   r   r@   ModulerA   r_   r   r   r   r   r   r>   r>   r>   r?   <module>   s>   	%X9jU