o
    
۾i3                     @   sx  d Z ddlmZ ddlmZ ddlmZ ddlZddlmZ ddl	m
Z
 ddlmZ dd	lmZmZ dd
lmZmZ ddlmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlm Z  ddl!m"Z"m#Z# ddl$m%Z% ddl&m'Z' ddl(m)Z) ddl*m+Z+m,Z,m-Z-m.Z.m/Z/ G dd dej0Z1G dd dej0Z2G dd dej0Z3eG dd dej0Z4G dd  d ej0e)Z5dS )!zCInference-only Orion-14B model compatible with HuggingFace weights.    )Iterable)islice)AnyN)nn)PretrainedConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
SiluAndMul)	Attention)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                       sH   e Zd Z		ddededededB deddf fd	d
Zdd Z  ZS )OrionMLPN hidden_sizeintermediate_size
hidden_actquant_configprefixreturnc                    sh   t    t||gd d|| dd| _t||d|| dd| _|dkr.td| dt | _d S )	N   Fz.gate_up_projbiasr$   r%   z
.down_projsiluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)selfr!   r"   r#   r$   r%   	__class__ T/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/orion.pyr,   1   s(   

zOrionMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)r-   r0   r.   )r1   xgate_up_r4   r4   r5   forwardN   s   
zOrionMLP.forward)Nr    )	__name__
__module____qualname__intstrr   r,   r:   __classcell__r4   r4   r2   r5   r   0   s"    r   c                       s~   e Zd Z					ddedededeeef dB ded	edB d
edB deddf fddZ	de
jde
jde
jfddZ  ZS )OrionAttentionN    r    r!   	num_headsnum_kv_headsrope_parametersmax_position_embeddingscache_configr$   r%   r&   c	           
   	      s>  t    || _t }	|| _| j|	 dksJ | j|	 | _|| _| j|	kr/| j|	 dks.J n	|	| j dks8J td| j|	 | _|| j | _	| j| j	 | _
| j| j	 | _| j	d | _|| _t|| j	| j| jd|| dd| _t| j| j	 |d|| dd| _t| j	||d| _t| j| j	| j| j||| d	d
| _d S )Nr   r   g      Fz	.qkv_projr(   z.o_proj)max_positionrE   z.attn)rD   rG   r$   r%   )r+   r,   r!   r   total_num_headsrC   total_num_kv_headsmaxrD   head_dimq_sizekv_sizescalingrF   r   qkv_projr   o_projr   
rotary_embr   attn)
r1   r!   rC   rD   rE   rF   rG   r$   r%   tp_sizer2   r4   r5   r,   V   s\   

	
zOrionAttention.__init__	positionshidden_statesc           
      C   s`   |  |\}}|j| j| j| jgdd\}}}| |||\}}| |||}| |\}	}|	S )N)dim)rP   splitrM   rN   rR   rS   rQ   )
r1   rU   rV   qkvr9   qkvattn_outputoutputr4   r4   r5   r:      s    zOrionAttention.forward)NrB   NNr    )r;   r<   r=   r>   dictr?   r   r   r   r,   torchTensorr:   r@   r4   r4   r2   r5   rA   U   s@    	
ArA   c                       sh   e Zd Z			ddededB dedB deddf
 fdd	Zd
ej	dej	de
ej	ej	f fddZ  ZS )OrionDecoderLayerNr    configrG   r$   r%   r&   c              
      s   t    |j| _t|dd}t| j|j|j|j|||| dd| _t	| j|j
|j|| dd| _tj|j|jd| _tj|j|jd| _d S )NrF   rB   z
.self_attn)r!   rC   rD   rE   rF   rG   r$   r%   z.mlp)r!   r"   r#   r$   r%   eps)r+   r,   r!   getattrrA   num_attention_headsnum_key_value_headsrE   	self_attnr   r"   r#   mlpr   	LayerNormrms_norm_epsinput_layernormpost_attention_layernorm)r1   rd   rG   r$   r%   rF   r2   r4   r5   r,      s0   

zOrionDecoderLayer.__init__rU   rV   c                 C   sH   |}|  |}| j||d}|| }|}| |}| |}|| }|S )N)rU   rV   )rn   rj   ro   rk   )r1   rU   rV   residualr4   r4   r5   r:      s   


zOrionDecoderLayer.forward)NNr    )r;   r<   r=   r   r   r   r?   r,   ra   rb   tupler:   r@   r4   r4   r2   r5   rc      s,    !rc   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )
OrionModelr    r%   vllm_configr%   c                   s   t    |jj|j |j| _j| _tjj	| _
tj fdd| dd\| _| _| _tjj	jd| _tdgj	| _d S )Nc                    s   t  | dS )Nrs   )rc   rs   rG   rd   r$   r4   r5   <lambda>   s    z%OrionModel.__init__.<locals>.<lambda>z.layersrs   re   rV   )r+   r,   model_config	hf_configrG   r$   rd   
vocab_sizer   r!   embed_tokensr   num_hidden_layersstart_layer	end_layerlayersr   rl   rm   normr   make_empty_intermediate_tensors)r1   rt   r%   r2   ru   r5   r,      s*   

zOrionModel.__init__	input_idsr&   c                 C   s
   |  |S r6   )rz   r1   r   r4   r4   r5   embed_input_ids   s   
zOrionModel.embed_input_idsNrU   intermediate_tensorsinputs_embedsc                 C   sz   t  jr|d ur|}n| |}n
|d usJ |d }t| j| j| jD ]}|||}q$t  js6td|iS | 	|}|S )NrV   )
r
   is_first_rankr   r   r~   r|   r}   is_last_rankr   r   )r1   r   rU   r   r   rV   layerr4   r4   r5   r:      s   
zOrionModel.forwardweightsc                 C   s   g d}t |  }t }|D ]Y\}}|D ].\}}}	||vrq|||}|dr/||vr/qt|| r5q|| }
|
j}||
||	  n|drN||vrNqt|| rTq|| }
t|
dt}||
| |	| q|S )N))rP   q_projr[   )rP   k_projr\   )rP   v_projr]   )r-   	gate_projr   )r-   up_projr   z.biasweight_loader)
r`   named_parameterssetreplaceendswithr   r   rg   r   add)r1   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   r4   r4   r5   load_weights  s2   


zOrionModel.load_weightsr6   )r;   r<   r=   r	   r?   r,   ra   rb   r   r   r:   r   rq   r   r   r@   r4   r4   r2   r5   rr      s     
,rr   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z		ddejdB dejde	dB dejdB deje	B f
ddZ
dejdejdB fddZdeeeejf  dee fddZ  ZS )OrionForCausalLMr    rs   rt   r%   c                   s   t    |jj}|j}|| _|| _t|t|dd| _t	|j
|j|t|dd| _| jjr5| jjj| j_t|j
| _| jj| _d S )Nmodel)rt   r%   lm_head)r$   r%   )r+   r,   rw   rx   r$   rd   rr   r   r   r   ry   r!   r   tie_word_embeddingsrz   weightr   logits_processorr   )r1   rt   r%   rd   r$   r2   r4   r5   r,   ?  s&   

zOrionForCausalLM.__init__r   r&   c                 C   s   | j |S r6   )r   r   r   r4   r4   r5   r   U  s   z OrionForCausalLM.embed_input_idsNrU   r   r   c                 C   s   |  ||||}|S r6   )r   )r1   r   rU   r   r   rV   r4   r4   r5   r:   X  s   zOrionForCausalLM.forwardrV   c                 C   s   |  | j|}|S r6   )r   r   )r1   rV   logitsr4   r4   r5   compute_logitsd  s   zOrionForCausalLM.compute_logitsr   c                 C   s   t | }||S r6   )r   r   )r1   r   loaderr4   r4   r5   r   k  s   
zOrionForCausalLM.load_weights)NN)r;   r<   r=   r	   r?   r,   ra   rb   r   r   r:   r   r   rq   r   r   r@   r4   r4   r2   r5   r   >  s,    

,r   )6__doc__collections.abcr   	itertoolsr   typingr   ra   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r	   vllm.distributedr
   r   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   utilsr   r   r   r   r   Moduler   rA   rc   rr   r   r4   r4   r4   r5   <module>   s6   	%O9`