o
    پi((                     @   s.  d dl mZmZ d dlZd dlmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZmZmZ d dlmZmZ d d	lmZ d d
lmZ d dlmZ d dlmZmZ d dlmZ d dlmZ d dl m!Z!m"Z" G dd dej#Z$G dd dej#Z%G dd dej#Z&G dd dej#Z'G dd dej#Z(e(Z)dS )    )IterableOptionalN)nn)	PhiConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessorLogitsProcessorOutput)QuantizationConfig)RadixAttention)get_rope)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixmake_layersc                	       Z   e Zd Z			ddedee dedef fdd	Zd
e	j
dede	j
de	j
fddZ  ZS )PhiAttentionN r   configquant_configprefixlayer_idc           
   
      s   t    |j| _|j| _| j| j | _t }| j| dks J | j| | _t| j| j| jd|d| _	t
| j| j|d| _| jd }t|j|j|j  }|d dksUJ t|dd}t|d	d
}	t| j||	|d| _t| j| j|| j||td|d| _d S )Nr   Tbiasr   r   g         
rope_thetag     @max_position_embeddingsi   )
rotary_dimmax_positionbaseattn)num_kv_headsr   r   r   )super__init__num_attention_headstotal_num_headshidden_size	head_sizer   	num_headsr
   qkv_projr   denseintpartial_rotary_factorgetattrr   
rotary_embr   r   r'   )
selfr   r   r   r    tensor_model_parallel_world_sizescalingr$   r"   r#   	__class__ I/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/phi.pyr*      sV   


zPhiAttention.__init__position_idsforward_batchhidden_statesreturnc                 C   sX   |  |\}}|jddd\}}}| |||\}}| j||||d}	| |	\}
}|
S )N   )chunksdim)r>   )r0   chunkr5   r'   r1   )r6   r=   r>   r?   qkv_qkvattn_outputoutputr;   r;   r<   forwardT   s   zPhiAttention.forwardNr   r   __name__
__module____qualname__r   r   r   strr2   r*   torchTensorr   rM   __classcell__r;   r;   r9   r<   r      s,    6r   c                       s6   e Zd Z	ddedee f fddZdd Z  ZS )	PhiMLPNr   r   c                    s`   t    t|dd }|d ur|nd|j }t|j||d| _t||j|d| _t|j	| _
d S )Nn_inner   r    )r)   r*   r4   r-   r	   fc1r   fc2r   
hidden_actact)r6   r   r   rX   r9   r;   r<   r*   d   s   
zPhiMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S N)rZ   r]   r[   )r6   r?   rG   r;   r;   r<   rM   x   s   
zPhiMLP.forwardr^   )	rP   rQ   rR   r   r   r   r*   rM   rV   r;   r;   r9   r<   rW   b   s    rW   c                	       r   )PhiLayerNr   r   r   r   r   idxc                    sF   t    tj|j|jd| _t||td||d| _	t
||| _d S )Neps	self_attn)r   r   )r)   r*   r   	LayerNormr-   layer_norm_epsinput_layernormr   r   rc   rW   mlp)r6   r   r   r   r`   r9   r;   r<   r*      s   
zPhiLayer.__init__r=   r>   r?   r@   c                 C   s8   |}|  |}| j|||d}| |}|| | }|S )N)r=   r?   r>   )rf   rc   rg   )r6   r=   r>   r?   residualattn_outputsfeed_forward_hidden_statesr;   r;   r<   rM      s   

zPhiLayer.forwardrN   rO   r;   r;   r9   r<   r_      s,    r_   c                       sx   e Zd Z		ddedee def fddZdej	d	ej	fd
dZ
	ddej	dedej	deej	 d	ej	f
ddZ  ZS )PhiModelNr   r   r   r   c                    s   t     | _t j j| _t }|j}|j	}| j
 | | _|d  j
 | | _t j
 fddtd|d| _tj j jd| _d S )N   c                    s   t  || dS )N)r   r   r`   )r_   )r`   r   r   r   r;   r<   <lambda>   s    z#PhiModel.__init__.<locals>.<lambda>layers)r   ra   )r)   r*   r   r   
vocab_sizer-   embed_tokensr   
world_sizeranknum_hidden_layersstart_layer	end_layerr   r   ro   r   rd   re   final_layernorm)r6   r   r   r   pp_grouppp_sizepp_rankr9   rm   r<   r*      s$   
zPhiModel.__init__	input_idsr@   c                 C   s
   |  |S r^   )rq   r6   r{   r;   r;   r<   get_input_embeddings   s   
zPhiModel.get_input_embeddingsr>   	positionsinputs_embedsc                 C   sR   |d ur|}n|  |}t| j| jD ]}| j| }||||d}q| |}|S )N)r=   r>   r?   )r}   rangeru   rv   ro   rw   )r6   r{   r>   r~   r   r?   ilayerr;   r;   r<   rM      s   


zPhiModel.forwardNr   r^   )rP   rQ   rR   r   r   r   rS   r*   rT   rU   r}   r   rM   rV   r;   r;   r9   r<   rk      s.    rk   c                       s   e Zd Zdg diZ		ddedee def fdd	Zd
e	j
de	j
fddZ	dd
e	j
de	j
dedee	j
 def
ddZdeeee	j
f  fddZ  ZS )PhiForCausalLMr0   )q_projk_projv_projNr   r   r   r   c                    sP   t    || _|| _t||td|d| _t|j|j	d|d| _
t|| _d S )Nmodel)r   r   r   Tr   )r)   r*   r   r   rk   r   r   r   rp   r-   lm_headr   logits_processor)r6   r   r   r   r9   r;   r<   r*      s   
zPhiForCausalLM.__init__r{   r@   c                 C   s   | j |S r^   )r   r}   r|   r;   r;   r<   r}     s   z#PhiForCausalLM.get_input_embeddingsr~   r>   r   c                 C   s$   | j ||||d}| ||| j|S )N)r{   r>   r~   r   )r   r   r   )r6   r{   r~   r>   r   r?   r;   r;   r<   rM     s   
zPhiForCausalLM.forwardweightsc                 C   s   t |  }t |}t }| D ]h\}}||v rqd}| j D ];\}}||vr*q!t|dt}	|D ]"}
|||
}||v rT|| }|
dd }|	||| |	| q2|	| d} |r`q||vreq|| }t|dt}	|	|| |	| qd S )NFweight_loaderrG   r   T)
dictnamed_parameterssetitemspacked_modules_mappingr4   r   replacesplitadd)r6   r   params_dictloaded_keysnameparam	is_packedpacked_name	src_namesr   src_namefull_src_nameloaded_weightshard_idr;   r;   r<   load_weights  s>   


zPhiForCausalLM.load_weightsr   r^   )rP   rQ   rR   r   r   r   r   rS   r*   rT   rU   r}   r   r   rM   r   tupler   rV   r;   r;   r9   r<   r      s4    
$r   )*typingr   r   rT   r   transformersr   sglang.srt.distributedr   r   sglang.srt.layers.activationr   sglang.srt.layers.linearr	   r
   r   "sglang.srt.layers.logits_processorr   r   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   r   Moduler   rW   r_   rk   r   
EntryClassr;   r;   r;   r<   <module>   s*   F';`