o
    
۾ik_                     @   s  d Z ddlZddlmZ ddlmZ ddlZddlmZ ddlm	Z	m
Z
 ddlmZmZ ddlmZmZmZ dd	lmZ dd
lmZ ddlmZ ddlmZ ddlmZ ddlmZmZmZm Z  ddl!m"Z" ddl#m$Z$ ddl%m&Z& ddl'm(Z( ddl)m*Z* ddl+m,Z, ddl-m.Z. ddl/m0Z0 ddl1m2Z2m3Z3m4Z4m5Z5m6Z6 ddl7m8Z8 ddl9m:Z:m;Z;m<Z<m=Z= ee>Z?G dd de6Z@G dd dejAZBG dd  d ejAZCG d!d" d"ejAZDG d#d$ d$ejAZEG d%d& d&e8ZFG d'd( d(e0ZGG d)d* d*e.ZHe,jIe5e@e3d+G d,d- d-e4eHZJdS ).zEInference-only InternS1Pro model compatible with HuggingFace weights.    N)Iterable)Any)nn)AutoProcessorPretrainedConfig)CacheConfig
VllmConfig)get_ep_group$get_tensor_model_parallel_world_size tensor_model_parallel_all_gather)init_logger)
SiluAndMul)	Attention)FusedMoE)RMSNorm)MergedColumnParallelLinearQKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)QuantizationConfig)get_rope)ParallelLMHead)sequence_parallel_chunk)MULTIMODAL_REGISTRY   )MixtureOfExperts)Qwen3MoeForCausalLM)Qwen3_VisionTransformerQwen3VLDummyInputsBuilderQwen3VLForConditionalGenerationQwen3VLMultiModalProcessorQwen3VLProcessingInfo)Qwen3MoeLLMModel)AutoWeightsLoaderWeightsMapperextract_layer_indexmaybe_prefixc                   @   s&   e Zd Zdd ZdedefddZdS )InternS1ProProcessingInfoc                 C   s
   | j  S N)ctxget_hf_config)self r-   [/home/ubuntu/.local/lib/python3.10/site-packages/vllm/model_executor/models/interns1_pro.pyr+   T   s   
z'InternS1ProProcessingInfo.get_hf_configkwargsreturnc                 K   s   t j| jjjfddi|S )Ntrust_remote_codeT)r   from_pretrainedr*   model_configmodel)r,   r/   r-   r-   r.   get_hf_processorW   s   z*InternS1ProProcessingInfo.get_hf_processorN)__name__
__module____qualname__r+   objectr   r5   r-   r-   r-   r.   r(   S   s    r(   c                       sN   e Zd Z			ddededededB ded	ed
df fddZdd Z  Z	S )InternS1ProMoeMLPNT hidden_sizeintermediate_size
hidden_actquant_configreduce_resultsprefixr0   c                    sj   t    t||gd d|| dd| _t||d||| dd| _|dkr/td| d	t | _d S )
N   Fz.gate_up_projbiasr?   rA   z
.down_proj)rD   r?   r@   rA   siluzUnsupported activation: z!. Only silu is supported for now.)	super__init__r   gate_up_projr   	down_proj
ValueErrorr   act_fn)r,   r<   r=   r>   r?   r@   rA   	__class__r-   r.   rG   `   s*   
	
zInternS1ProMoeMLP.__init__c                 C   s*   |  |\}}| |}| |\}}|S r)   )rH   rK   rI   )r,   xgate_up_r-   r-   r.   forward   s   
zInternS1ProMoeMLP.forward)NTr;   )
r6   r7   r8   intstrr   boolrG   rQ   __classcell__r-   r-   rL   r.   r:   _   s(    r:   c                
       s   e Zd Z	ddedef fddZeejde	de	defd	d
Z
dejdejde	dedejf
ddZdejdejfddZ  ZS )InternS1ProMoeSparseMoeBlockr;   vllm_configrA   c                    s<  t    |jj}|j}|j}t | _t j	| _
t j| _| j
 | _|j| _|j| _| j|jkr>td| j d|j d|jj}|j| _| j| _|j| _| j| j | _| j| j | _| j| j | _| j| j | _t|dd| _t| j|j |j!|j"d|j#|| d| j| j| j| j$d| _%t&|j!|jd	| d
d| _'d S )NzTensor parallel size z' is greater than the number of experts .router_n_groupsTz.experts)num_expertstop_kr<   r=   r@   renormalizer?   rA   enable_eplbnum_redundant_expertsis_sequence_parallelcustom_routing_functionFz.gate)rD   rA   )(rF   rG   r3   hf_text_configparallel_configr?   r
   tp_sizer	   device_groupep_grouprank_in_groupep_ranksizeep_sizer[   n_routed_expertsuse_sequence_parallel_moer`   rJ   eplb_configr^   n_logical_expertsr_   n_redundant_expertsn_physical_expertsn_local_physical_expertsphysical_expert_startphysical_expert_endgetattrn_groupsr   num_experts_per_tokr<   moe_intermediate_sizenorm_topk_prob_custom_routing_functionexpertsr   gate)r,   rW   rA   configrc   r?   rm   rL   r-   r.   rG      s\   




z%InternS1ProMoeSparseMoeBlock.__init__ru   
group_sizedevicec                 C   s    t j| |d| ddd}|S )N)r~   r   rZ   )torcharangeview)ru   r}   r~   group_offsetsr-   r-   r.   get_group_offsets   s   z.InternS1ProMoeSparseMoeBlock.get_group_offsetshidden_statesgating_outputtopkr]   r0   c                 C   s   t j|dt jd}| jdkr]|jd | j dks%J |jd  d| j || j }|jd | j }| | j||j}|d| j|f}t j||dd\}	}
|
| 	dd}
|		dd}	n
t j||dd\}	}
|rr|	|	j
ddd }	|	|
fS )	NrZ   )dimdtyper   z cannot be divided by r   T)r   keepdim)r   softmaxfloat32ru   shaper   r~   	unflattenr   flattensum)r,   r   r   r   r]   routing_weightsper_group_top_kr}   r   topk_weightstopk_idsr-   r-   r.   ry      s(   



z5InternS1ProMoeSparseMoeBlock._custom_routing_functionc                 C   s   |  dks
J d|  dk}|j\}}|d|}| jr"t|}| |\}}| j||d}| jr>t|d}|d | }|rE|dS |S )NrB   z:InternS1ProMoeSparseMoeBlock only supports 1D or 2D inputsr   rZ   )r   router_logitsr   )	r   r   r   r`   r   r{   rz   r   squeeze)r,   r   is_input_1d
num_tokens
hidden_dimr   rP   final_hidden_statesr-   r-   r.   rQ      s$   
z$InternS1ProMoeSparseMoeBlock.forwardr;   )r6   r7   r8   r   rS   rG   staticmethod	functools	lru_cacherR   r   r   TensorrT   ry   rQ   rU   r-   r-   rL   r.   rV      s,    A
 rV   c                       s   e Zd Z								ddededed	eeef d
ededB dedede	dB de
dB dedeeef dB ddf fddZdejdejdejfddZ  ZS )InternS1ProMoeAttention   Nư>Fr;   r<   	num_headsnum_kv_headsrope_parametersmax_position_embeddingshead_dimrms_norm_epsqkv_biascache_configr?   rA   dual_chunk_attention_configr0   c              	      s  t    || _t }|| _| j| dksJ | j| | _|| _| j|kr/| j| dks.J n	|| j dks8J td| j| | _|pG|| j | _	| j| j	 | _
| j| j	 | _| j	d | _|| _|| _t|| j	| j| j||
| dd| _t| j| j	 |d|
| dd| _| j|d< t| j	|||d	| _t| j| j	| jf| j|	|
| d
d|rt||dni | _t| j	|d| _t| j	|d| _d S )Nr   r   g      z	.qkv_projrC   Fz.o_projnum_key_value_heads)max_positionr   r   z.attn)r   r   r?   rA   )	layer_idxr   eps)rF   rG   r<   r
   total_num_headsr   total_num_kv_headsmaxr   r   q_sizekv_sizescalingr   r   r   qkv_projr   o_projr   
rotary_embr   r&   attnr   q_normk_norm)r,   r<   r   r   r   r   r   r   r   r   r?   rA   r   rd   rL   r-   r.   rG     sv   




z InternS1ProMoeAttention.__init__	positionsr   c                 C   s   |  |\}}|j| j| j| jgdd\}}}|jg |jd d |jd | j | jR  }| |}||j}|jg |jd d |jd | j | jR  }	| |	}	|	|j}| j	
|||\}}| |||}
| |
\}}|S )NrZ   r   )r   splitr   r   r   r   r   r   r   r   forward_nativer   r   )r,   r   r   qkvrP   qkv	q_by_head	k_by_headattn_outputoutputr-   r-   r.   rQ   `  s    0
0
zInternS1ProMoeAttention.forward)r   Nr   FNNr;   N)r6   r7   r8   rR   dictrS   r   floatrT   r   r   rG   r   r   rQ   rU   r-   r-   rL   r.   r     sV    
	
Sr   c                
       s\   e Zd Zddededdf fddZdejd	ejd
ejdB deejejf fddZ	  Z
S )InternS1ProMoeDecoderLayerr;   rW   rA   r0   Nc                    s  t    |jj}|j}|j}|j| _t|dd}t|dd }|j h d}t	 fdd|D }	 
dd }
 
dd } 
d	d }|	|jd
< |
|jd< ||jd< ||jd	< |	s]J dt| j|j|j|j||jt|ddt|dd ||| d|d| _t|}t|dsg n|j}||vr|jdkr|d |j dkrt|| dd| _nt|j|j|j|| dd| _t|j|jd| _t|j|jd| _d S )Nr   r   r   >   num_inv_freqfope_sep_headfope_init_factorc                 3   s    | ]
}  |d uV  qd S r)   )get).0keyrope_scalingr-   r.   	<genexpr>  s    z6InternS1ProMoeDecoderLayer.__init__.<locals>.<genexpr>r   r   r   use_fopez%should use FOPE for InternS1Pro modelattention_biasFr   z
.self_attn)r<   r   r   r   r   r   r   r   r   r?   rA   r   mlp_only_layersr   r   z.mlprW   rA   )r<   r=   r>   r?   rA   r   )rF   rG   r3   rb   r   r?   r<   rt   r   anyr   r   r   num_attention_headsr   r   	self_attnr&   hasattrr   r[   decoder_sparse_steprV   mlpr:   r=   r>   r   input_layernormpost_attention_layernorm)r,   rW   rA   r|   r   r?   r   r   	fope_keysr   r   r   r   r   r   rL   r   r.   rG   v  sj   








z#InternS1ProMoeDecoderLayer.__init__r   r   residualc                 C   sX   |d u r|}|  |}n|  ||\}}| j||d}| ||\}}| |}||fS )N)r   r   )r   r   r   r   )r,   r   r   r   r-   r-   r.   rQ     s   
z"InternS1ProMoeDecoderLayer.forwardr   )r6   r7   r8   r   rS   rG   r   r   tuplerQ   rU   r-   r-   rL   r.   r   u  s    Br   c                       s:   e Zd Zdeddededeejj	 f fddZ
  ZS )InternS1ProMoeLLMModelr;   )rA   decoder_layer_typerW   rA   r   c                   s   t  j|||d d S )N)rW   rA   r   )rF   rG   )r,   rW   rA   r   rL   r-   r.   rG     s
   
zInternS1ProMoeLLMModel.__init__)r6   r7   r8   r   r   rS   typer   r   ModulerG   rU   r-   r-   rL   r.   r     s    
r   c                       s,   e Zd Zdddedef fddZ  ZS )InternS1ProMoeLLMForCausalLMr;   rA   rW   rA   c                   s   t t|   |jjj| _|j| _t|t	|dd| _
t| jj| jj| jt	|dd| _| jjr7| j
jj| j_t| jj| _| j
j| _d S )Nr4   r   lm_head)r?   rA   )rF   r   rG   r3   	hf_configtext_configr|   r?   r   r'   r4   r   
vocab_sizer<   r   tie_word_embeddingsembed_tokensweightr   logits_processormake_empty_intermediate_tensors)r,   rW   rA   rL   r-   r.   rG     s"   
z%InternS1ProMoeLLMForCausalLM.__init__)r6   r7   r8   r   rS   rG   rU   r-   r-   rL   r.   r     s    $r   c                   @   s*   e Zd ZdededdfddZdd ZdS )	Qwen3VLMoeMixtureOfExpertsnum_physical_expertsnum_local_physical_expertsr0   Nc                 C   sj   | j |ksJ || _|| _ || j | _| jjjD ]}t|jt	r2|j}||_
||_| j|_|j  qd S r)   )r   r   num_logical_expertsr_   language_modelr4   layers
isinstancer   rV   rq   rp   ro   rz   update_expert_map)r,   r   r   layermoer-   r-   r.    update_physical_experts_metadata  s   
z;Qwen3VLMoeMixtureOfExperts.update_physical_experts_metadatac                 C   s   g | _ g | _d }| jjjD ]}t|dr%t|jtr%|j}| j	|jj
 q|d u r.tdt| j| _d| _d| _|j| _|j| _|j| _|j| _|j| _d S )Nr   z4No InternS1ProMoe layer found in the language_model.r   r   )expert_weights
moe_layersr   r4   r   r   r   r   rV   appendrz   RuntimeErrorlennum_moe_layersnum_expert_groupsnum_shared_expertsrn   r   rp   r   rq   r   rk   num_routed_expertsro   r_   )r,   example_moer   r-   r-   r.   set_moe_parameters  s(   z-Qwen3VLMoeMixtureOfExperts.set_moe_parameters)r6   r7   r8   rR   r   r  r-   r-   r-   r.   r     s    
r   )infodummy_inputsc                       s   e Zd ZU dZeed< dg diZeddddd	Zd
dde	de
f fddZde
fddZdeee
ejf  fddZ  ZS )#InternS1ProForConditionalGenerationTis_3d_moe_weightr   )q_projk_projv_projvisual.language_model.lm_head.language_model.model.zmodel.visual.zlm_head.zmodel.language_model.)orig_to_new_prefixr;   r   rW   rA   c                   s   t t|   |jj}|jj}|| _|| _|jdk| _|j	| _	|
 | _
|ds2|ds2d | _nt|jt|ddt|dd| _t|t|dd	| _| j| jjB | _| jj| _t|jd
| _| jrit|jjnd| _|jj| _| j| j | _|   d S )Ndataimagevideor   r   visual)norm_epsrA   r   r   deepstack_visual_indexesr   )rF   r    rG   r3   r   multimodal_configr|   mm_encoder_tp_modeuse_data_parallelvideo_pruning_rateis_multimodal_pruning_enabledget_limit_per_promptr  r   vision_configrt   r'   r   r   packed_modules_mappingr   r   use_deepstackr  r  deepstack_num_levelout_hidden_size
visual_dimmultiscale_dimr  )r,   rW   rA   r|   r  rL   r-   r.   rG   :  sF   


z,InternS1ProForConditionalGeneration.__init__r0   c                 C   sJ   i }| j j D ]\}}d|v rd| |d< d|v r"d| |d< q|S )Nzrotary_emb.sin_coefr  z(language_model.model.rotary_emb.sin_coefzrotary_emb.cos_coefz(language_model.model.rotary_emb.cos_coef)r   r4   named_parameters)r,   mappernameparamsr-   r-   r.   get_frope_params_mapk  s   z8InternS1ProForConditionalGeneration.get_frope_params_mapweightsc                 C   sL   dg}| j du r|d tdddd|  d}t| |d}|j||d	S )
zload weightszmodel.time_series.Nr  r  r  r  )r  orig_to_new_suffix)skip_prefixes)r)  )r  r   r%   r,  r$   load_weights)r,   r-  r/  weights_mapperloaderr-   r-   r.   r0  x  s   

z0InternS1ProForConditionalGeneration.load_weights)r6   r7   r8   r  rT   __annotations__r"  r%   hf_to_vllm_mapperr   rS   rG   r,  r   r   r   r   r0  rU   r-   r-   rL   r.   r     s   
 	1$r  )K__doc__r   collections.abcr   typingr   r   r   transformersr   r   vllm.configr   r   vllm.distributedr	   r
   r   vllm.loggerr   %vllm.model_executor.layers.activationr   $vllm.model_executor.layers.attentionr   $vllm.model_executor.layers.fused_moer   $vllm.model_executor.layers.layernormr   !vllm.model_executor.layers.linearr   r   r   r   +vllm.model_executor.layers.logits_processorr   'vllm.model_executor.layers.quantizationr   +vllm.model_executor.layers.rotary_embeddingr   3vllm.model_executor.layers.vocab_parallel_embeddingr    vllm.model_executor.models.utilsr   vllm.multimodalr   
interfacesr   	qwen3_moer   qwen3_vlr   r   r    r!   r"   qwen3_vl_moer#   utilsr$   r%   r&   r'   r6   loggerr(   r   r:   rV   r   r   r   r   r   register_processorr  r-   r-   r-   r.   <module>   sX   ' iZ,

