o
    پi#                     @   s
  d dl Z d dlZd dlmZmZmZmZmZ d dlZd dl	m
Z
 d dlmZmZ d dlmZ d dlmZ d dlmZ d dlmZ d d	lmZ d d
lmZmZ d dlmZmZ d dlmZ e e!Z"G dd deZ#G dd deZ$G dd deZ%G dd deZ&e&Z'dS )    N)AnyDictIterableOptionalTuple)PretrainedConfig)get_attention_tp_rankget_attention_tp_size)TopK)QuantizationConfig)get_rope)ForwardBatch)default_weight_loader)Qwen3MoeAttentionQwen3MoeDecoderLayer)Qwen3MoeLLMModel"Qwen3VLMoeForConditionalGeneration)
add_prefixc                       sn   e Zd Z				ddedededed	ed
eeeef  deddf fddZ	de
jde
jdefddZ  ZS )InternS1ProTextAttentionr   @B N   hidden_size	num_headsnum_kv_headslayer_id
rope_thetarope_scalingmax_position_embeddingsreturnc                    s   t  j|||f|| |d| h d}	t fdd|	D }
|
r+d d< | j d< t| j| j|| d| _d	| _d	| _d	| _	d S )
N)r   r   r   r   >   num_inv_freqfope_sep_headfope_init_factorc                 3   s    | ]
}  |d uV  qd S N)get).0keyr    Q/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/interns1pro.py	<genexpr>0   s    z4InternS1ProTextAttention.__init__.<locals>.<genexpr>Tuse_foper   )
rotary_dimmax_positionbaser   F)
super__init__anyr   r   head_dim
rotary_embcompatible_with_fused_kv_bufferuse_fused_qk_norm_rope"_used_fused_qk_norm_rope_last_call)selfr   r   r   r   r   r   r   kwargs	fope_keysr*   	__class__r&   r(   r/      s6   

z!InternS1ProTextAttention.__init__	positionshidden_statesforward_batchc                 C   s   t  r"   )NotImplementedError)r6   r;   r<   r=   r'   r'   r(   forward_prepare_npu@   s   z,InternS1ProTextAttention.forward_prepare_npu)r   r   Nr   )__name__
__module____qualname__intfloatr   r   strr   r/   torchTensorr   r?   __classcell__r'   r'   r9   r(   r      s:    
'r   c                       s   e Zd Z			ddededee dedeej	j
 ddf fd	d
ZeejdededefddZdejdejdededejf
ddZ  ZS )InternS1ProTextDecoderLayerN configr   quant_configprefix
alt_streamr   c                    s   t  j|||||d t|dd}t|dd }t|dd}t|d|j|j }	|j}
|j}t| j|j|j|||||	|
|||t	d||d	| _
t|d
d| _| jdkru|j| j dkseJ |j d| j t|j|jd|| jd| j_d S d S )N)rL   rM   rN   r   r   r   r   r   r1   	self_attn)r   r   r   r   r   r   r   r1   rms_norm_epsattention_biasrK   rL   rM   rN   router_n_groupsr    cannot be divided by F)top_krenormalizeuse_grouped_topkr   custom_routing_function)r.   r/   getattrr   num_attention_headsrP   rQ   r   num_key_value_headsr   rO   rR   num_experts_per_tokr
   norm_topk_prob_custom_routing_functionmlptopk)r6   rK   r   rL   rM   rN   r   r   r   r1   rP   rQ   r9   r'   r(   r/   J   sX   
z$InternS1ProTextDecoderLayer.__init__rR   
group_sizedevicec                 C   s    t j| |d| ddd}|S )N)rb      rS   )rF   arangeview)rR   ra   rb   group_offsetsr'   r'   r(   get_group_offsets   s   z-InternS1ProTextDecoderLayer.get_group_offsetsr<   gating_outputr`   rV   c                 C   s   t j|dt jd}| jdkr]|jd | j dks%J |jd  d| j || j }|jd | j }| | j||j}|d| j|f}t j||dd\}	}
|
| 	dd}
|		dd}	n
t j||dd\}	}
|rr|	|	j
ddd }	|	|
fS )	zGroup routerrS   )dimdtyper   rT   ri   T)ri   keepdim)rF   softmaxfloat32rR   shaperg   rb   	unflattenr`   flattensum)r6   r<   rh   r`   rV   routing_weightsper_group_top_kra   rf   topk_weightstopk_idsr'   r'   r(   r^      s,   




z4InternS1ProTextDecoderLayer._custom_routing_function)NrJ   N)r@   rA   rB   r   rC   r   r   rE   rF   cudaStreamr/   staticmethod	functools	lru_cacherg   rG   boolr^   rH   r'   r'   r9   r(   rI   I   s>    
7rI   c                       s8   e Zd Zdedddedee def fddZ  Z	S )	InternS1ProTextModelNrJ   )rL   decoder_layer_typerM   rK   rL   rM   c                   s   t  j||||d d S )N)rK   rL   rM   r   )r.   r/   )r6   rK   rL   r   rM   r9   r'   r(   r/      s   
zInternS1ProTextModel.__init__)
r@   rA   rB   rI   r   r   r   rE   r/   rH   r'   r'   r9   r(   r~      s    r~   c                	       sn   e Zd Zddefdedee deddf fddZd	ed
e	j
fddZdeeee	j
f  f fddZ  ZS )#InternS1ProForConditionalGenerationNrJ   rK   rL   rM   r   c                    sF   t |jds
g |j_t j||||d t|jjdkr!i | _d S d S )Ndeepstack_visual_indexes)rL   rM   language_model_clsr   )hasattrvision_configr   r.   r/   lenuse_deepstack)r6   rK   rL   rM   r   r9   r'   r(   r/      s   
z,InternS1ProForConditionalGeneration.__init__nameloaded_weightc                 C   s~   t  }t }|d}||k r|| }|}|| }|j|dd| }|dd}||v s.J || }	t|	dt}
|
|	| dS )zload fope weightsr   rk   z.rotary_emb.z.layers.0.self_attn.rotary_emb.weight_loaderN)r	   r   sizechunkreplacerY   r   )r6   r   r   params_dictattn_tp_sizeattn_tp_rankr[   n_replicate
param_nameparamr   r'   r'   r(   _load_fope_weights   s   
z6InternS1ProForConditionalGeneration._load_fope_weightsweightsc                    sz   t | dst|  | _| j}t }|D ]\}}d|v s d|v r.|dd}| ||| q|||< qt |  dS )zload weights_cached_params_dictsin_coefcos_coefzmodel.language_model.zmodel.N)	r   dictnamed_parametersr   r   r   r.   load_weightsitems)r6   r   r   other_weightsr   r   r9   r'   r(   r      s   

z0InternS1ProForConditionalGeneration.load_weights)r@   rA   rB   r~   r   r   r   rE   r/   rF   rG   r   r   r   r   rH   r'   r'   r9   r(   r      s    (r   )(r{   loggingtypingr   r   r   r   r   rF   transformersr   sglang.srt.layers.dp_attentionr   r	   sglang.srt.layers.moe.topkr
   *sglang.srt.layers.quantization.base_configr   "sglang.srt.layers.rotary_embeddingr   ,sglang.srt.model_executor.forward_batch_infor   $sglang.srt.model_loader.weight_utilsr   sglang.srt.models.qwen3_moer   r   sglang.srt.models.qwen3_vl_moer   r   sglang.srt.utilsr   	getLoggerr@   loggerr   rI   r~   r   
EntryClassr'   r'   r'   r(   <module>   s(    
1e=