o
    پi>                     @   s  d dl Z d dlmZmZmZmZ d dlZd dlmZ d dlm	Z	 d dl
mZ d dlmZmZ d dlmZmZmZ d dlmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlm Z  d dl!m"Z"m#Z#m$Z$ d dl%m&Z& d dl'm(Z( d dl)m*Z*m+Z+ ej,j-dd Z.ej,j-d!dee/ fddZ0G dd dej1Z2G dd dej1Z3G dd dej1Z4G dd dej1Z5G dd  d ej1Z6e6Z7dS )"    N)IterableOptionalTupleUnion)nn)
Phi3Config)PretrainedConfig)get_pp_group$get_tensor_model_parallel_world_size)MergedColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessorLogitsProcessorOutput)PoolerPoolingType)QuantizationConfig)RadixAttention)get_rope)PPMissingLayer)DEFAULT_VOCAB_PADDING_SIZEParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixmake_layersc                 C   s   | t d|   S )NgZd;?)torchsigmoid)x r    P/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/phi3_small.py
quick_gelu   s   r"   limitc                 C   s   | dd d df | ddd df }}|d ur6t t |||jd |d}t t |||j| |d}t|}||d  S )N.      )minmax)r   whereisinfclampr"   )inputr#   a_gelua_linearout_gelur    r    r!   gegelu$   s   &r/   c                	       s@   e Zd Z		ddedee deddf fddZd	d
 Z  Z	S )Phi3SmallMLPN configquant_configprefixreturnc                    s   t    || _| jjdksJ d|j| _|j| _|j| _t| jd| jg d|td|d| _	t
| j| jd|td|d| _d S )Nr/   z:Only `gegelu` is supported for the 4.7 series of models ..r$   Tup_projbiasr3   r4   	down_proj)super__init__r2   
hidden_acthidden_sizegegelu_limitintermediate_sizer   r   r6   r   r9   selfr2   r3   r4   	__class__r    r!   r;   6   s,   

zPhi3SmallMLP.__init__c                 C   s(   |  |\}}t|}| |\}}|S N)r6   r/   r9   )rA   r   gate_up_r    r    r!   forwardT   s   zPhi3SmallMLP.forwardNr1   )
__name__
__module____qualname__r   r   r   strr;   rG   __classcell__r    r    rB   r!   r0   4   s    r0   c                       sx   e Zd Z			ddededee deddf
 fd	d
Zde	j
de	j
dedee	j
ee	j
 eee	j
  f fddZ  ZS )Phi3SmallSelfAttentionr   Nr1   r2   layer_idr3   r4   r5   c           
   
      sd  t    || _|| _|j| _|j| _|j| _	|j
| _|j|jks#J |j| _|j| _| j| j | _t | _|j| _| j| j | _| jdkrP| j| j dksPJ td| j| j | _| j| j | _|j| _|j| _|j| _d| _d }|jr|| j|j }nt| j}d| | _ t!| j| j| j| jd|t"d|d| _#t$| j| jd|t"d|d| _%t&| jdd d ur| jj'}|D ]}t(|| t)rt*|| ||< qd|vr| j|d< nd	| jd
}t+| j| j| j| j|d| _,|j| _|j| _|j
| _
t&| jdd o| jd | jj- dk}d }	|s| j| j| j| j| j	| j| jd}	t.| j| j| j | j||t"d|d| _/d S )Nr%   r   Tqkv_projr7   o_projrope_scalingfactorlinear)	rope_typerS   )
rotary_dimmax_positionbaserR   dense_attention_every_n_layers)
max_seqlen	num_headsnum_kv_heads
block_sizelocal_blocksvert_stride	homo_headattn)r\   rO   r3   r4   )0r:   r;   rO   r2   blocksparse_block_sizesparse_block_sizeblocksparse_homo_head_pattern
homo_headsblocksparse_num_local_blocksr^   blocksparse_vert_strider_   $blocksparse_triton_kernel_block_sizer=   num_attention_headsr[   head_dimr
   tp_sizenum_key_value_headsnum_q_per_kvr'   num_kv_heads_per_partionnum_heads_per_partitionmax_position_embeddingsrope_embedding_baserope_position_scale	is_causalmup_use_scalingmup_attn_multipliermathsqrtscaler   r   query_key_valuer   densegetattrrR   
isinstancelisttupler   
rotary_embrY   r   ra   )
rA   r2   rO   r3   r4   norm_factorrR   keyuse_dense_attn	bs_paramsrB   r    r!   r;   ]   s   




	
zPhi3SmallSelfAttention.__init__	positionshidden_statesforward_batchc                 C   s   |  |\}}||jd d d| jd | jf }|j| jddgdd\}}}|d| j| j }|d| j| j }|d| j| j }| 	|||\}}| j
||||d}	| |	\}
}|
S )Nr$   r%   )dimr   )ry   viewshaperm   rj   splitreshapero   rn   r   ra   rz   )rA   r   r   r   qkvrF   qkvattn_outputoutputr    r    r!   rG      s   &zPhi3SmallSelfAttention.forward)r   Nr1   )rI   rJ   rK   r   intr   r   rL   r;   r   Tensorr   r   rG   rM   r    r    rB   r!   rN   [   s0    urN   c                	       sX   e Zd Z		ddededee def fddZd	e	j
d
e	j
dede	j
fddZ  ZS )Phi3SmallDecoderLayerNr1   r2   rO   r3   r4   c                    sl   t    |j| _t|||td|d| _t||td|d| _tj	|j|j
d| _tj	|j|j
d| _d S )N	self_attn)r3   r4   mlpr4   eps)r:   r;   r=   rN   r   r   r0   r   r   	LayerNormlayer_norm_epsiloninput_layernormpost_attention_layernorm)rA   r2   rO   r3   r4   rB   r    r!   r;      s&   
zPhi3SmallDecoderLayer.__init__r   r   r   r5   c                 C   sJ   |}|  |}| j|||d}|| }|}| |}| |}|| }|S )N)r   r   r   )r   r   r   r   )rA   r   r   r   residualr    r    r!   rG     s   


zPhi3SmallDecoderLayer.forwardrH   )rI   rJ   rK   r   r   r   r   rL   r;   r   r   r   rG   rM   r    r    rB   r!   r      s*    r   c                       s|   e Zd Z		ddedee def fddZdej	d	ej	fd
dZ
dejdeej dedeej	 d	eej	 f
ddZ  ZS )Phi3SmallModelNr1   r2   r3   r4   c                    s   t     | _t | _| jjrt j jt	d|d| _
nt | _
t j jt	d|d| _
 j| _t j fdd| jj| jjt	d|d\| _| _| _tj j jd| _d S )Nembed_tokensr   c                    s   t  t|dd |dS )N.r   r   )r   r   r   )idxr4   r2   r3   r    r!   <lambda>=  s    z)Phi3SmallModel.__init__.<locals>.<lambda>layers)pp_rankpp_sizer4   r   )r:   r;   r2   r	   pp_groupis_first_rankr   
vocab_sizer=   r   r   r   mup_embedding_multiplierr   num_hidden_layersrank_in_group
world_sizer   start_layer	end_layerr   r   r   final_layernormr@   rB   r   r!   r;   !  s4   

zPhi3SmallModel.__init__	input_idsr5   c                 C   s
   |  |S rD   )r   rA   r   r    r    r!   get_input_embeddingsL     
z#Phi3SmallModel.get_input_embeddingsr   r   inputs_embedsc                 C   sp   |d ur|}n|  |}| jd ur| jdkr|| j }t| j| jD ]}| j| }||||d}q"| |}|S )Ng        r   )r   r   ranger   r   r   r   )rA   r   r   r   r   r   ilayerr    r    r!   rG   O  s   





zPhi3SmallModel.forwardrH   )rI   rJ   rK   r   r   r   rL   r;   r   r   r   
LongTensorr   r   rG   rM   r    r    rB   r!   r     s,    +r   c                       s   e Zd ZdgZ		d$dedee def fddZd	e	j
d
e	j
fddZdd Zdd Zdd Zdd Zdd Zd	e	jde	j
d
ee	j
 fddZ		d%d	e	jdee	j dedee	j
 ded
efdd Zd!eeee	j
f  fd"d#Z  ZS )&Phi3SmallForCausalLMlm_head.weightNr1   r2   r3   r4   c              	      s   t    || _|| _t||td|d| _|j| _|j| _t	| j|j
|jt|td|d| _| jjr:| jjj| j_t|| _ttjdd| _t|drb| jjj}| jdt|j|dd	 d S d | _d S )
Nmodel)r2   r3   r4   lm_head)org_num_embeddingspadding_sizer3   r4   T)pooling_type	normalizedummy_token_indicesF)
persistent)r:   r;   r2   r3   r   r   r   r   mup_width_multiplierr   r=   r   r   tie_word_embeddingsr   weightr   logits_processorr   r   LASTpoolerhasattrdeviceregister_bufferr   r   r   to)rA   r2   r3   r4   r   rB   r    r!   r;   l  s<   





zPhi3SmallForCausalLM.__init__r   r5   c                 C   s   | j |S rD   )r   r   r   r    r    r!   r        z)Phi3SmallForCausalLM.get_input_embeddingsc                 C   s   || j _d S rD   )r   r   rA   valuer    r    r!   set_input_embeddings  r   z)Phi3SmallForCausalLM.set_input_embeddingsc                 C      | j S rD   r   rA   r    r    r!   get_output_embeddings     z*Phi3SmallForCausalLM.get_output_embeddingsc                 C   
   || _ d S rD   r   r   r    r    r!   set_output_embeddings  r   z*Phi3SmallForCausalLM.set_output_embeddingsc                 C   r   rD   r   )rA   decoderr    r    r!   set_decoder  r   z Phi3SmallForCausalLM.set_decoderc                 C   r   rD   r   r   r    r    r!   get_decoder  r   z Phi3SmallForCausalLM.get_decoderr   c                 C   s<   |  || j||}| jd ur|d ur|d| jtj  |S )Nr   )r   r   r   index_fill_r   inf)rA   r   r   sampling_metadatalogitsr    r    r!   compute_logits  s   
z#Phi3SmallForCausalLM.compute_logitsFr   r   r   get_embeddingc                 C   s4   | j ||||d}|s| ||| j|S | ||S )N)r   r   r   r   )r   r   r   r   )rA   r   r   r   r   r   r   r    r    r!   rG     s   
zPhi3SmallForCausalLM.forwardweightsc                 C   sl   t |  }|D ]+\}}d|v rq|dr||vrq| jjr$d|v r$q|| }t|dt}||| qd S )Nzrotary_emb.inv_freqz.biasr   weight_loader)dictnamed_parametersendswithr2   r   r{   r   )rA   r   params_dictnameloaded_weightparamr   r    r    r!   load_weights  s   z!Phi3SmallForCausalLM.load_weightsrH   )NF)rI   rJ   rK   _tied_weights_keysr   r   r   rL   r;   r   r   r   r   r   r   r   r   r   r   r   boolr   rG   r   r   r   rM   r    r    rB   r!   r   i  sP    *

$r   rD   )8rv   typingr   r   r   r   r   r   transformersr    transformers.configuration_utilsr   sglang.srt.distributedr	   r
   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   r   sglang.srt.layers.poolerr   r   *sglang.srt.layers.quantization.base_configr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   sglang.srt.layers.utilsr   *sglang.srt.layers.vocab_parallel_embeddingr   r   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   r   jitscriptr"   floatr/   Moduler0   rN   r   r   r   
EntryClassr    r    r    r!   <module>   s:    
' 5Js