o
    پi+                     @   sB  d dl mZ d dlmZ d dlZd dlmZ d dlmZ d dlm	Z	m
Z
 d dlmZ d dlmZmZmZ d d	lmZmZ d d
lmZ d dlmZ d dlmZ d dlmZ d dlmZmZ d dlm Z  d dl!m"Z" d dl#m$Z$m%Z% G dd dej&Z'G dd dej&Z(G dd dej&Z)G dd dej&Z*G dd dej&Z+e+Z,dS )    )Iterable)OptionalN)nn)PersimmonConfig)get_pp_group$get_tensor_model_parallel_world_size)
get_act_fn)ColumnParallelLinearQKVParallelLinearRowParallelLinear)LogitsProcessorLogitsProcessorOutput)QuantizationConfig)RadixAttention)get_rope)PPMissingLayer)ParallelLMHeadVocabParallelEmbedding)ForwardBatch)default_weight_loader)
add_prefixmake_layersc                       s>   e Zd Z	d	dedee f fddZdejfddZ	  Z
S )
PersimmonMLPNconfigquant_configc                    sB   t    t|j|j|d| _t|j|j|d| _t|j	| _
d S )Nr   )super__init__r	   hidden_sizeintermediate_sizedense_h_to_4hr   dense_4h_to_hr   
hidden_actact)selfr   r   	__class__ O/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/persimmon.pyr      s   


zPersimmonMLP.__init__returnc                 C   s*   |  |\}}| |}| |\}}|S N)r    r#   r!   )r$   hidden_states_r'   r'   r(   forward+   s   
zPersimmonMLP.forwardr*   )__name__
__module____qualname__r   r   r   r   torchTensorr-   __classcell__r'   r'   r%   r(   r      s    r   c                	       s   e Zd Z			ddedee dedef fdd	Zd
e	j
de	j
fddZd
e	j
de	j
fddZde	j
dede	j
de	j
fddZ  ZS )PersimmonAttentionN r   r   r   prefixlayer_idc              
      s:  t    || _t }|j| _|j| _| j| | _| j| j | _|j	| _	|j
| _
|j| _d| _| j| j | jks:J | j| dksCJ t| j| j| jd|d| _t| j| j | jd|d| _|j| _| jrst| j| _t| j| _t| j| j| j	| j
| jd| _| jd | _t| j| j| j| j||td|d| _d S )NTr   biasr   )
rotary_dimmax_positionbasepartial_rotary_factorg      attn)num_kv_headsr7   r   r6   )r   r   r   r   r   num_attention_headstotal_num_heads	num_headshead_dimmax_position_embeddings
rope_thetar=   	is_causalr
   query_key_valuer   denseqk_layernormis_qk_layernormr   	LayerNormq_layernormk_layernormr   
rotary_embscalingr   r   r>   )r$   r   r   r6   r7   tensor_parallel_world_sizer%   r'   r(   r   4   s^   

zPersimmonAttention.__init__xr)   c                 C   s   |j d }||| j| jS Nr   shapeviewrB   rC   r$   rQ   
seq_lengthr'   r'   r(   _split_headsp   s   
zPersimmonAttention._split_headsc                 C   s   |j d }||| j| j S rR   rS   rV   r'   r'   r(   _merge_headst   s   
zPersimmonAttention._merge_headsposition_idsforward_batchr+   c                 C   s   |  |\}}|jddd\}}}| jr2| |}| |}| |}| |}| |}| |}| |||\}}| j||||d}	| 	|	\}
}|
S )N   )chunksdim)r[   )
rG   chunkrJ   rX   rL   rM   rY   rN   r>   rH   )r$   rZ   r[   r+   qkvr,   qkvattn_outputoutputr'   r'   r(   r-   x   s   





zPersimmonAttention.forwardNr5   r   )r.   r/   r0   r   r   r   strintr   r1   r2   rX   rY   r   r-   r3   r'   r'   r%   r(   r4   2   s0    <r4   c                	       sZ   e Zd Z			ddedee dedef fdd	Zd
e	j
dede	j
de	j
fddZ  ZS )PersimmonDecoderLayerNr5   r   r   r   r6   idxc                    sd   t    |j| _t||td||d| _t||d| _tj	|j|j
d| _tj	|j|j
d| _d S )N	self_attn)r   r   r6   r7   r   eps)r   r   r   r4   r   rl   r   mlpr   rK   layer_norm_epsinput_layernormpost_attention_layernorm)r$   r   r   r6   rk   r%   r'   r(   r      s   
zPersimmonDecoderLayer.__init__rZ   r[   r+   r)   c                 C   sN   |}|  |}| j|||d}|| }|}| |}| |}|| }|}|S )N)rZ   r+   r[   )rq   rl   rr   ro   )r$   rZ   r[   r+   residualoutputsr'   r'   r(   r-      s   


zPersimmonDecoderLayer.forwardrg   )r.   r/   r0   r   r   r   rh   ri   r   r1   r2   r   r-   r3   r'   r'   r%   r(   rj      s,    rj   c                       sx   e Zd Z		ddedee def fddZdej	d	ej	fd
dZ
	ddej	dedej	deej	 d	ej	f
ddZ  ZS )PersimmonModelNr5   r   r   r6   c                    s   t     | _t | _| jjrt j j| _	nt
 | _	t j fddd| jj| jjd\| _| _| _| jjrFtj j jd| _d S t
 | _d S )Nc                    s   t  || dS )N)r   r6   rk   )rj   )rk   r6   r   r   r'   r(   <lambda>   s    z)PersimmonModel.__init__.<locals>.<lambda>zmodel.layers)r6   pp_rankpp_sizerm   )r   r   r   r   pp_groupis_first_rankr   
vocab_sizer   embed_tokensr   r   num_hidden_layersrank_in_group
world_sizelayersstart_layer	end_layeris_last_rankr   rK   rp   final_layernormr$   r   r   r6   r%   rv   r(   r      s(   

zPersimmonModel.__init__	input_idsr)   c                 C   s
   |  |S r*   )r}   r$   r   r'   r'   r(   get_input_embeddings   s   
z#PersimmonModel.get_input_embeddingsr[   	positionsinputs_embedsc                 C   s^   | j jr|d ur|}n	| |}n|j}t| j| jD ]}| j| }||||d}q| |S )N)rZ   r[   r+   )	rz   r{   r   pp_input_hiddenranger   r   r   r   )r$   r   r[   r   r   r+   ilayerr'   r'   r(   r-      s   

zPersimmonModel.forwardNr5   r*   )r.   r/   r0   r   r   r   rh   r   r1   r2   r   r   r-   r3   r'   r'   r%   r(   ru      s.    "ru   c                       s   e Zd Z		ddedee def fddZdej	d	ej	fd
dZ
	ddej	dej	dedeej	 d	ef
ddZdeeeej	f  fddZ  ZS )PersimmonForCausalLMNr5   r   r   r6   c                    sP   t    || _|| _t||td|d| _t|j|j	d|d| _
t|| _d S )Nmodel)r   r   r6   Fr8   )r   r   r   r   ru   r   r   r   r|   r   lm_headr   logits_processorr   r%   r'   r(   r     s   
zPersimmonForCausalLM.__init__r   r)   c                 C   s   | j |S r*   )r   r   r   r'   r'   r(   r     s   z)PersimmonForCausalLM.get_input_embeddingsr   r[   r   c                 C   s$   | j ||||d}| ||| j|S )N)r   r[   r   r   )r   r   r   )r$   r   r   r[   r   r+   r'   r'   r(   r-     s   
zPersimmonForCausalLM.forwardweightsc           
      C   s   t |  }|D ]b\}}d|v rq||vr#|dkrqtd| d q|| }d|v r_t|dd }|d ur_|j}| jj}||d | |ddf ||d	 d   }|||d	 }|	|}t|d
t
}	|	|| qd S )Nzrotary_emb.inv_freqzlm_head.weightzWarning: weight z not found in model.rG   
output_dimr\   r]      weight_loader)dictnamed_parametersprintgetattrrT   r   r@   rU   	transposereshaper   )
r$   r   params_dictnameloaded_weightparamr   loaded_weight_shaperB   r   r'   r'   r(   load_weights/  s6   

z!PersimmonForCausalLM.load_weightsr   r*   )r.   r/   r0   r   r   r   rh   r   r1   r2   r   r   r   r-   r   tupler   r3   r'   r'   r%   r(   r     s0    
$r   )-collections.abcr   typingr   r1   r   transformersr   sglang.srt.distributedr   r   sglang.srt.layers.activationr   sglang.srt.layers.linearr	   r
   r   "sglang.srt.layers.logits_processorr   r   sglang.srt.layers.quantizationr   !sglang.srt.layers.radix_attentionr   "sglang.srt.layers.rotary_embeddingr   sglang.srt.layers.utilsr   *sglang.srt.layers.vocab_parallel_embeddingr   r   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.utilsr   r   Moduler   r4   rj   ru   r   
EntryClassr'   r'   r'   r(   <module>   s.    _4?F