o
    i2                     @   sr  d Z ddlmZ ddlmZ ddlZddlmZ ddlmZ ddl	m
Z
 ddlmZmZ dd	lmZmZ dd
lmZ ddlmZ ddlmZmZmZ ddlmZ ddlmZ ddlmZ ddlm Z m!Z! ddl"m#Z# ddl$m%Z% ddl&m'Z'm(Z( ddl)m*Z*m+Z+m,Z,m-Z-m.Z. G dd dej/Z0G dd dej/Z1G dd dej/Z2e
G dd dej/Z3G dd dej/e'e(Z4dS ) zAInference-only Phi-1.5 model compatible with HuggingFace weights.    )Iterable)isliceN)nn)	PhiConfig)support_torch_compile)CacheConfig
VllmConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)	Attention)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHeadVocabParallelEmbedding)default_weight_loader)IntermediateTensors   )SupportsLoRA
SupportsPP)AutoWeightsLoaderis_pp_missing_parameter'make_empty_intermediate_tensors_factorymake_layersmaybe_prefixc                	       Z   e Zd Z			ddededB dedB def fddZd	ej	d
ej	dej	fddZ
  ZS )PhiAttentionN configcache_configquant_configprefixc                    s   t    |j| _| j|j | _t }|j| dksJ |j| | _t| j| j|jd|| dd| _t	| j| j|| dd| _
| jd }t|dd	}t| j||jd
| _t| j| j|||| dd| _d S )Nr   Tz	.qkv_projbiasr$   r%   z.denser$   r%   g      max_position_embeddingsi   )max_positionrope_parametersz.attn)r#   r$   r%   )super__init__hidden_sizenum_attention_heads	head_sizer
   	num_headsr   qkv_projr   densegetattrr   r+   
rotary_embr   attn)selfr"   r#   r$   r%    tensor_model_parallel_world_sizescalingr)   	__class__ T/home/ubuntu/vllm_env/lib/python3.10/site-packages/vllm/model_executor/models/phi.pyr-   O   sF   

zPhiAttention.__init__position_idshidden_statesreturnc           
      C   sT   |  |\}}|jddd\}}}| |||\}}| |||}| |\}	}|	S )N   )chunksdim)r2   chunkr5   r6   r3   )
r7   r>   r?   qkv_qkvattn_outputoutputr<   r<   r=   forward   s   zPhiAttention.forwardNNr!   __name__
__module____qualname__r   r   r   strr-   torchTensorrM   __classcell__r<   r<   r:   r=   r    N   s(    0r    c                       s<   e Zd Z		d
dededB def fddZdd	 Z  ZS )PhiMLPNr!   r"   r$   r%   c                    sp   t    t|dd }|d ur|nd|j }t|j||| dd| _t||j|| dd| _t|j	| _
d S )Nn_inner   z.fc1r(   z.fc2)r,   r-   r4   r.   r   fc1r   fc2r   
hidden_actact)r7   r"   r$   r%   rX   r:   r<   r=   r-      s    
zPhiMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)rZ   r]   r[   )r7   r?   rG   r<   r<   r=   rM      s   
zPhiMLP.forward)Nr!   )	rP   rQ   rR   r   r   rS   r-   rM   rV   r<   r<   r:   r=   rW      s    rW   c                	       r   )PhiLayerNr!   r"   r#   r$   r%   c                    sP   t    tj|j|jd| _t|||| dd| _t	||| dd| _
d S )Nepsz
.self_attnr%   z.mlp)r,   r-   r   	LayerNormr.   layer_norm_epsinput_layernormr    	self_attnrW   mlp)r7   r"   r#   r$   r%   r:   r<   r=   r-      s   
zPhiLayer.__init__r>   r?   r@   c                 C   s6   |}|  |}| j||d}| |}|| | }|S )N)r>   r?   )re   rf   rg   )r7   r>   r?   residualattn_outputsfeed_forward_hidden_statesr<   r<   r=   rM      s   

zPhiLayer.forwardrN   rO   r<   r<   r:   r=   r_      s(    r_   c                       s   e Zd Zdddedef fddZdejdejfd	d
Z	ddejdB dejde	dB dejdB deje	B f
ddZ
deeeejf  dee fddZ  ZS )PhiModelr!   rb   vllm_configr%   c                   s   t    |jj|j |j| _| _tjj	| _
tj fdd| dd\| _| _| _tjj	jd| _tdgj	| _d S )Nc                    s   t  | dS )Nrb   )r_   rb   r#   r"   r$   r<   r=   <lambda>   s    z#PhiModel.__init__.<locals>.<lambda>z.layersrb   r`   r?   )r,   r-   model_config	hf_configr#   r$   r"   r   
vocab_sizer.   embed_tokensr   num_hidden_layersstart_layer	end_layerlayersr   rc   rd   final_layernormr   make_empty_intermediate_tensors)r7   rl   r%   r:   rm   r=   r-      s(   

zPhiModel.__init__	input_idsr@   c                 C   s
   |  |S r^   )rr   r7   ry   r<   r<   r=   embed_input_ids   s   
zPhiModel.embed_input_idsN	positionsintermediate_tensorsinputs_embedsc                 C   sz   t  jr|d ur|}n| |}n
|d usJ |d }t| j| j| jD ]}|||}q$t  js6td|iS | 	|}|S )Nr?   )
r	   is_first_rankr{   r   rv   rt   ru   is_last_rankr   rw   )r7   ry   r|   r}   r~   r?   layerr<   r<   r=   rM      s   
zPhiModel.forwardweightsc                 C   s   g d}t |  }t }|D ]^\}}d|v rq|D ].\}}}	||vr$q|||}|dr4||vr4qt|| r:q|| }
|
j}||
||	  n|drS||vrSqt|| rYq|| }
t|
dt}||
| |	| q|S )N))r2   q_projrH   )r2   k_projrI   )r2   v_projrJ   zrotary_emb.inv_freqz.biasweight_loader)
dictnamed_parameterssetreplaceendswithr   r   r4   r   add)r7   r   stacked_params_mappingparams_dictloaded_paramsnameloaded_weight
param_nameweight_nameshard_idparamr   r<   r<   r=   load_weights  s6   


zPhiModel.load_weightsr^   )rP   rQ   rR   r   rS   r-   rT   rU   r{   r   rM   r   tupler   r   rV   r<   r<   r:   r=   rk      s     
,rk   c                       s   e Zd Zdg diZdddedef fddZd	ejd
ejfddZ			dd	ejdB dejde
dB dejdB d
eje
B f
ddZdejd
ejdB fddZdeeeejf  d
ee fddZ  ZS )PhiForCausalLMr2   )r   r   r   r!   rb   rl   r%   c                   sz   t    |jj}|j}|| _|jrJ || _t|t|dd| _	t
|j|jd|t|dd| _t|j| _| j	j| _d S )Nmodel)rl   r%   Tlm_headr&   )r,   r-   ro   rp   r$   r"   tie_word_embeddingsrk   r   r   r   rq   r.   r   r   logits_processorrx   )r7   rl   r%   r"   r$   r:   r<   r=   r-   7  s&   


zPhiForCausalLM.__init__ry   r@   c                 C   s   | j |S r^   )r   r{   rz   r<   r<   r=   r{   R  s   zPhiForCausalLM.embed_input_idsNr|   r}   r~   c                 C   s   |  ||||}|S r^   )r   )r7   ry   r|   r}   r~   r?   r<   r<   r=   rM   U  s   zPhiForCausalLM.forwardr?   c                 C   s   |  | j|| jj}|S r^   )r   r   r'   )r7   r?   logitsr<   r<   r=   compute_logitsb  s   zPhiForCausalLM.compute_logitsr   c                 C   s   t | }||S r^   )r   r   )r7   r   loaderr<   r<   r=   r   i  s   
zPhiForCausalLM.load_weights)NN)rP   rQ   rR   packed_modules_mappingr   rS   r-   rT   rU   r{   r   rM   r   r   r   r   r   rV   r<   r<   r:   r=   r   .  s0    

,r   )5__doc__collections.abcr   	itertoolsr   rT   r   transformersr   vllm.compilation.decoratorsr   vllm.configr   r   vllm.distributedr	   r
   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   !vllm.model_executor.layers.linearr   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr   r   -vllm.model_executor.model_loader.weight_utilsr   vllm.sequencer   
interfacesr   r   utilsr   r   r   r   r   Moduler    rW   r_   rk   r   r<   r<   r<   r=   <module>   s4   &	>!!_