o
    پi                     @   s  d dl Zd dlZd dlZd dlZd dlZd dlmZ d dlm	Z	m
Z
mZmZ d dlZd dlmZ d dlmZmZ d dlmZmZ d dlmZ d dlmZmZmZ d d	lmZ d d
lmZ d dlm Z  d dl!m"Z"m#Z# d dl$m%Z%m&Z&m'Z' d dl(m)Z) d dl*m+Z+ d dl,m-Z- e- Z.d dl/m0Z0m1Z1 d dl,m2Z2 e3e4Z5G dd dej6Z7dd Z8G dd dej6Z9dd Z:G dd dej6Z;G dd dej6Z<G dd  d ej6Z=G d!d" d"ej6Z>G d#d$ d$ej6Z?G d%d& d&ej6Z@e@ZAdS )'    N)Iterable)ListOptionalSetTuple)nn)Llama4ConfigLlama4VisionConfig)Llama4MultiModalProjectorvision_apply_rotary_emb)VisionAttention)ColumnParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)FusedMoE)QuantizationConfig)/MultiModalityDataPaddingPatternMultimodalTokensgeneral_mm_embed_routine)ModalityMultimodalDataItemMultimodalInputs)ForwardBatch)get_global_server_args)is_cpu)default_weight_loadermaybe_remap_kv_scale_name)
add_prefixc                       s`   e Zd Z			ddededededed	ee d
edef fddZde	j
de	j
fddZ  ZS )Llama4VisionMLPN F
input_sizeintermediate_sizeoutput_sizebiasoutput_activationquant_configprefixuse_data_parallelc	                    sj   t    |r	tnt}	|	||||| dd| _|rtnt}
|
||||| dd| _t | _	|| _
d S )Nz.fc1r    r"   r#   r%   r&   z.fc2)super__init__r   r   fc1r   fc2r   GELUactivation_fnr$   )selfr    r!   r"   r#   r$   r%   r&   r'   cls_fc1cls_fc2	__class__ M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/mllama4.pyr*   4   s&   


zLlama4VisionMLP.__init__hidden_statesreturnc                 C   s:   |  |\}}| |}| |\}}| jr| |S |S N)r+   r.   r,   r$   r/   r6   _r4   r4   r5   forwardS   s   

zLlama4VisionMLP.forwardNr   F)__name__
__module____qualname__intboolr   r   strr*   torchTensorr;   __classcell__r4   r4   r2   r5   r   2   s,    		r   c           
   	   C   s   | j \}}}tt|}| |||d} |  \}}}}| ||t|| t|| }|dddd }||t|| t|| t||d  }|dddd }||d|j d }	|	S )Nr            )shaper@   mathsqrtviewsizepermute
contiguous)
input_tensorshuffle_ratio
batch_sizenum_patcheschannels
patch_sizeheightwidthreshaped_tensoroutput_tensorr4   r4   r5   pixel_shuffle\   s"   

r[   c                       sL   e Zd Z			ddee dedef fddZd	ej	d
ej	fddZ
  ZS )Llama4VisionPixelShuffleMLPNr   Fr%   r&   r'   c              
      s>   t    |j| _t|j|j|j|jd|| d|d| _d S )NT.mlpr    r!   r"   r#   r$   r%   r&   r'   )	r)   r*   pixel_shuffle_ratior   r!   projector_input_dimprojector_output_dimmulti_modal_projector_biasmlpr/   configr%   r&   r'   r2   r4   r5   r*   w   s   
z$Llama4VisionPixelShuffleMLP.__init__encoded_patchesr7   c                 C   s   t || j}| |S r8   )r[   r_   rc   )r/   rf   r4   r4   r5   r;      s   
z#Llama4VisionPixelShuffleMLP.forwardr<   )r=   r>   r?   r   r   rB   rA   r*   rC   rD   r;   rE   r4   r4   r2   r5   r\   u   s    r\   c                 C   sP   |d d }g || j dd  R }| |} ||}t| ||\} }| |fS )NrG   )rJ   rM   r   )qkfreqs_cirJ   input_shapehidden_shaper4   r4   r5   apply_position_embedding   s   

rm   c                	       sN   e Zd Z		ddedee dedef fddZd	e	j
d
e	j
fddZ  ZS )Llama4VisionEncoderLayerr   Fre   r%   r&   r'   c                    s   t    |j| _|j| _|j| _t| j| j| jdd dtd|dtd	| _t	|j|j|jdd|| d|d| _
t|j| _t|j| _d S )NTF	self_attn)use_qkv_parallelr%   flatten_batchr&   qkv_bias%customized_position_embedding_applierr]   r^   )r)   r*   hidden_sizenum_attention_headsr!   r   r   rm   ro   r   rc   r   	LayerNorminput_layernormpost_attention_layernormrd   r2   r4   r5   r*      s6   
z!Llama4VisionEncoderLayer.__init__hidden_staterj   c                 C   sL   |}|  |}| j||d}|| }|}| |}| |}|| }|}|S )N)position_embeddings)rw   ro   rx   rc   )r/   ry   rj   residualoutputsr4   r4   r5   r;      s   


z Llama4VisionEncoderLayer.forwardr   Fr=   r>   r?   r	   r   r   rB   rA   r*   rC   rD   r;   rE   r4   r4   r2   r5   rn      s"    &rn   c                	       sT   e Zd Z		ddedee dedef fddZd	e	j
d
e	j
de	j
fddZ  ZS )Llama4VisionEncoderr   Fre   r%   r&   r'   c                    s:   t     | _t fddt jD | _d S )Nc                    s&   g | ]}t   d | dqS )z.layers.)r%   r&   r'   )rn   ).0	layer_idxre   r&   r%   r'   r4   r5   
<listcomp>   s    z0Llama4VisionEncoder.__init__.<locals>.<listcomp>)r)   r*   re   r   
ModuleListrangenum_hidden_layerslayersrd   r2   r   r5   r*      s   

zLlama4VisionEncoder.__init__r6   rj   r7   c                 C   s    | j D ]
}|||d}|}q|S )a  
        Args:
            hidden_states (`torch.FloatTensor` of shape
                    `(batch_size, sequence_length, hidden_size)`):
                Optionally, instead of passing `input_ids` you can choose to
                directly pass an embedded representation. This is useful if you
                want more control over how to convert `input_ids` indices into
                associated vectors than the model's internal embedding
                lookup matrix.
        rj   )r   )r/   r6   rj   encoder_layerlayer_outputsr4   r4   r5   r;      s   
zLlama4VisionEncoder.forwardr}   r~   r4   r4   r2   r5   r      s&    r   c                	       sP   e Zd Z			ddedee dedef fdd	Zd
e	j
de	j
fddZ  ZS )Llama4UnfoldConvolutionNr   Fre   r%   r&   r'   c                    s   t    |j}t|tr||f}tjj||jd| _|j	|d  |d  |j
d|| dd}|r5t}nt}d|d< |d	i || _d S )
N)kernel_sizestrider   rH   Fz.linearr(   Tgather_outputr4   )r)   r*   rV   
isinstancer@   rC   r   Unfoldunfoldnum_channelsrt   r   r   linear)r/   re   r%   r&   r'   r   paramsclsr2   r4   r5   r*     s    

z Llama4UnfoldConvolution.__init__r6   r7   c                 C   s.   |  |}|ddd }| |\}}|S )Nr   rG   rH   )r   rO   rP   r   r9   r4   r4   r5   r;   "  s   
zLlama4UnfoldConvolution.forwardr<   r~   r4   r4   r2   r5   r     s    r   c                       s$   e Zd Z fddZdd Z  ZS )Llama4VisionRotaryEmbeddingc                    sd  t    |j|j }tj|d tjd|d d}tj||d d gdd}d|d< || }|| }|j	|j
 d }d|jtd|dd |d   |   }|d d	 |d d d d f  jdd
d}|d d	 |d d d d f  jdd
d}	tj||	gd
d  dd d df }
|
|d
dddk d}
ttjt|
t|
gd
d}|| _d S )NrG   )dtyperH   r   dimrg   )rF   rF   g      ?).NrF   .)r)   r*   
image_sizerV   rC   arangeint32reshapecatrt   ru   
rope_thetafloatrepeat_interleaverP   masked_fillview_as_complexstackcossinrj   )r/   re   idximg_idxfrequencies_xfrequencies_yfreq_dim	rope_freqfreqs_xfreqs_yfreqsfreq_cisr2   r4   r5   r*   *  s2   
  (
z$Llama4VisionRotaryEmbedding.__init__c                 C   s   | j |jS r8   )rj   todevice)r/   r6   r4   r4   r5   r;   D     z#Llama4VisionRotaryEmbedding.forward)r=   r>   r?   r*   r;   rE   r4   r4   r2   r5   r   )  s    r   c                       sJ   e Zd Z		ddedee def fddZdej	d	ej	fd
dZ
  ZS )Llama4VisionModelNr   re   r%   r&   c                    s   t    || _|j| _|j| _|j| _|j| _| j| j d d | _|jd | _t	||| dd| _
t| jt| j | _t| jt| j| j | _t|| _tj| jdd| _tj| jdd| _t||| dd| _t||| d	d
| _d S )NrG   rH   g      z.patch_embeddingr%   r&   gh㈵>)epsz.modelz.vision_adapter)r&   )r)   r*   re   r   rV   rt   r   rT   scaler   patch_embeddingr   	ParameterrC   randnclass_embeddingpositional_embedding_vlmr   rotary_embeddingrv   layernorm_prelayernorm_postr   modelr\   vision_adapter)r/   re   r%   r&   r2   r4   r5   r*   J  s<   

zLlama4VisionModel.__init__pixel_valuesr7   c           	      C   s   |  |}|j\}}}| j|jd d|jd }tj||gdd}|d7 }||d||}| jj|j	|j
d}|| }| |}||d|}| |}| j||d}| |}|d d d dd d f }| |}|S )Nr   rH   rF   r   )r   r   r   )r   rJ   r   expandrC   r   r   r   r   r   r   r   rM   r   r   r   r   )	r/   r   ry   	num_tilesrT   
hidden_dimr   positional_embeddingrj   r4   r4   r5   r;   w  s2   




zLlama4VisionModel.forwardNr   )r=   r>   r?   r	   r   r   rB   r*   rC   rD   r;   rE   r4   r4   r2   r5   r   H  s    -r   c                       s  e Zd Zg dddgdZedZ		dQded	ee	 d
e
f fddZdefddZde
defddZdee defddZdee dejfddZde
defddZdejdejdededejf
d d!Zd"e
d#ejdee
ejf fd$d%Zd&eee
ejf  dee
 fd'd(Zd"e
defd)d*Z d"e
de
fd+d,Z!d"e
d-e"defd.d/Z#d"e
d#ejd0e$d-e"d1e%defd2d3Z&d"e
d#ejd4e$d-e"d5ed1e%defd6d7Z'd"e
d#ejd4e$d-e"d1e%defd8d9Z(	:dRd"e
d;edee
e
ee
 f fd<d=Z)d"e
d#ejd-e"d5ed1e%defd>d?Z*d"e
d#ejd-e"d5ed1e%defd@dAZ+d"e
d#ejd-e"fdBdCZ,dSdDeee  fdEdFZ-dGdH Z.dIdJ Z/dKdL Z0dMdN Z1dOdP Z2  Z3S )TLlama4ForConditionalGeneration)q_projk_projv_proj	gate_projup_proj)qkv_projgate_up_projzd^language_model\.model\.layers\.(\d+)\.(?:self_attn|mlp)\.(?:qkv_proj|o_proj|down_proj|gate_up_proj)Nr   re   r%   r&   c                    s   t    || _|| _| || _| jstd | jot j	| _
| j
rMt|di di }d|v r9d|v r9d }n|}t|j|td|d| _t|| _nd | _d | _dd	lm} |t|d
rb|jn||td|d| _tt|d
ru|jn|| _t | _d S )NzNo vision weights found in checkpoint. Model will run in text-only mode. Multimodal capabilities (vision understanding) will be unavailable. Please not that this warning might be inaccurate if the weights haven't been fully downloadedquantization_configignorezmodel.layers.vision_model*z#model.layers.multi_modal_projector*vision_modelr   r   )Llama4ForCausalLMtext_configlanguage_model)r)   r*   re   r%   _has_vision_weightshas_vision_weightsloggerwarningr   enable_multimodal
has_visiongetattrgetr   vision_configr   r   r
   multi_modal_projectorsglang.srt.models.llama4r   hasattrr   r   r   logits_processorr   padding_pattern)r/   re   r%   r&   ignore_quant_layersvision_quant_configr   r2   r4   r5   r*     sH   
z'Llama4ForConditionalGeneration.__init__r7   c                 C   s   t |dd}|s
dS tj|r"tj|d}tj|r"| |S z!ddlm} ||ddd}|r>tj|rA| |W S W dS W dS  t	yM   Y dS w )zECheck if the model has vision components by examining the checkpoint._name_or_pathNFzmodel.safetensors.index.jsonr   )try_to_load_from_cache)repo_idfilename	cache_dir)
r   ospathisdirjoinexists_check_vision_weights_in_indexhuggingface_hubr   	Exception)r/   re   
model_path
index_filer   index_file_pathr4   r4   r5   r     s0   
z2Llama4ForConditionalGeneration._has_vision_weightsr   c              
      s   z2t |d}t|}W d   n1 sw   Y  g d |di  }t fdd|D W S  ttjtfy@   Y dS w )zBCheck if the model.safetensors.index.json contains vision weights.rN)r   vision_towerr   
weight_mapc                 3   s"    | ]} D ]}||v V  qqd S r8   r4   )r   weight_namepatternvision_patternsr4   r5   	<genexpr>  s    zPLlama4ForConditionalGeneration._check_vision_weights_in_index.<locals>.<genexpr>F)	openjson_libloadr   keysanyOSErrorJSONDecodeErrorKeyError)r/   r   f
index_dataweight_namesr4   r   r5   r   
  s   
z=Llama4ForConditionalGeneration._check_vision_weights_in_index	input_ids	mm_inputsc                 C   s   | j ||S r8   )r   pad_input_tokens)r/   r	  r
  r4   r4   r5   pad_input_ids  r   z,Llama4ForConditionalGeneration.pad_input_idsitemsc                 C   sz   | j r| jd u rtdtdd |D t| j j	t| j j
}| |}|d|d}| |}|S )Nz3Vision model not available for text-only checkpointc                 S   s   g | ]}|j qS r4   )feature)r   itemr4   r4   r5   r   %  s    zDLlama4ForConditionalGeneration.get_image_feature.<locals>.<listcomp>rF   )r   r   
ValueErrorrC   concatr   next
parametersr   typer   rM   rN   r   )r/   r  r   image_featuresvision_flatprojected_vision_flatr4   r4   r5   get_image_feature  s   

z0Llama4ForConditionalGeneration.get_image_featuremodule_namec                 C   s   t | j|S )z5Skip vision model and multi_modal_projector for LoRA.)rA   lora_patternmatch)r/   r  r4   r4   r5   should_apply_lora1  s   z0Llama4ForConditionalGeneration.should_apply_lora	positionsforward_batchkwargsc                 K   s.   | j r| jnd }t||| jtj|i|d}|S )N)r	  r  r   data_embedding_funcsr  )r   r  r   r   r   IMAGE)r/   r	  r  r  r  image_embedding_funchsr4   r4   r5   r;   5  s   	
z&Llama4ForConditionalGeneration.forwardnameloaded_weightc                    s   dt jdtf fdd}|d}d|v sd|v r5|d d	kr5tr' jjj}n jjj}|||}||fS d
|v s=d|v rU|d d	krUtrK jjj	}n jjj
}|||}||fS )Nwn_headsc                    s@    j jj| } j jj}| ||| d d|dd||S )NrG   rH   )r   re   head_dimrt   rM   	transposer   )r&  r'  attn_inattn_outr/   r4   r5   rO   R  s   
zLLlama4ForConditionalGeneration.permute_qk_weight_for_rotary.<locals>.permute.wkr   rF   weightwqr   )rC   rD   r@   split_is_cpur   re   original_total_num_kv_headsnum_key_value_headsoriginal_num_attention_headsru   )r/   r$  r%  rO   modulesr   r4   r,  r5   permute_qk_weight_for_rotaryL  s   





z;Llama4ForConditionalGeneration.permute_qk_weight_for_rotaryweightsc           
   	   C   s  g d}t |  }t| jdr| jjjn| jj}tjddd|d}t }|D ]O\}}| 	|r1q'| 
|}d|v rA|dd	}n| ||\}}| ||rU|| q'| |||||r_q'| ||||||rjq'|| | ||| q'| | }	|	rtd
|	  d S d S )N)).self_attn.qkv_projz.self_attn.q_projrh   )r9  z.self_attn.k_projri   )r9  z.self_attn.v_projv).shared_expert.gate_up_projz.shared_expert.gate_projr   )r;  z.shared_expert.up_projrH   ).feed_forward.gate_up_projz.feed_forward.gate_projr   )r<  z.feed_forward.up_projrH   r   r   	down_projr   )ckpt_gate_proj_nameckpt_down_proj_nameckpt_up_proj_namenum_expertsvisionz.self_attn.o_projz.self_attn.projz2Some weights are not initialized from checkpoints )dictnamed_parametersr   re   r   num_local_expertsr   make_expert_params_mappingset_should_skip_weight_transform_weight_namereplacer7  _handle_scale_remappingadd_handle_stacked_params_handle_expert_weights_handle_default_weightr  r   r   )
r/   r8  stacked_params_mappingparams_dictrA  expert_params_mappingloaded_paramsr$  r%  unloaded_paramsr4   r4   r5   load_weightsn  s\   





z+Llama4ForConditionalGeneration.load_weightsc                 C   s   | j  od|v pd|v S )z,Check if we should skip loading this weight.rB  r   )r   r/   r$  r4   r4   r5   rH    s   z2Llama4ForConditionalGeneration._should_skip_weightc                 C   s(   | dsd|vrd|vrd| S |S )z@Transform weight name by adding language_model prefix if needed.zlanguage_model.rB  r   )
startswithrV  r4   r4   r5   rI    s   
z5Llama4ForConditionalGeneration._transform_weight_namerQ  c                 C   s&   d|v rd|vrt ||}||kS dS )z:Handle scale parameter remapping. Returns True if handled.r   expertF)r   )r/   r$  rQ  remapped_namer4   r4   r5   rK    s   
z6Llama4ForConditionalGeneration._handle_scale_remappingrP  rS  c                 C   sN   |D ]"\}}}||v r$| ||}	||	 ||	 }
|
|
||  dS qdS )z:Handle stacked parameter loading. Returns True if handled.TF)rJ  rL  weight_loader)r/   r$  r%  rP  rQ  rS  
param_namer   shard_idtransformed_nameparamr4   r4   r5   rM    s   	
z5Llama4ForConditionalGeneration._handle_stacked_paramsrR  rA  c                 C   sZ   d|vrdS d|vrd|vr|  |||||S d|v r$| |||||S | |||||S )a	  Handle expert weight loading for MoE (Mixture of Experts) layers.

        Args:
            name: Parameter name from the checkpoint
            loaded_weight: The weight tensor to be loaded
            expert_params_mapping: Mapping of parameter names to expert configurations
            params_dict: Dictionary of model parameters
            num_experts: Total number of experts in the MoE layer

        Returns:
            bool: True if the parameter was handled (is an expert parameter), False otherwise
        z.expertsFzexperts.gate_up_projzexperts.down_projr   )_handle_other_expert_params_handle_expert_scale_params_handle_expert_weight_params)r/   r$  r%  rR  rQ  rA  rS  r4   r4   r5   rN    s   


z5Llama4ForConditionalGeneration._handle_expert_weightsc                 C   sV   |D ]&\}}}}	||v r(| ||}
||
 }|j||||	|d ||
  dS qdS )a  Handle expert parameters that are not gate_up_proj or down_proj weights.

        Args:
            name: Parameter name from the checkpoint
            loaded_weight: The weight tensor to be loaded
            expert_params_mapping: List of tuples mapping checkpoint names to model parameters
            params_dict: Dictionary of model parameters
            loaded_params: Set of loaded parameter names

        Returns:
            bool: True if parameter was found and handled, False otherwise
        r\  	expert_idTF)rJ  rZ  rL  )r/   r$  r%  rR  rQ  rS  r[  r   rc  r\  r]  r^  r4   r4   r5   r_    s   

z:Llama4ForConditionalGeneration._handle_other_expert_paramsF	is_weightc                 C   sZ   |rdnd}d|v r| dd| }d}ddg}n| d	d
| }d}dg}|||fS )a#  Transform expert parameter name and get shard information.

        Args:
            name: The original parameter name
            is_weight: Whether this is a weight parameter (adds _weight suffix)

        Returns:
            Tuple of (transformed_name, shard_id, shard_id_list)
        _weightr   .gate_up_projz.experts.gate_up_projz.experts.w13w13w1w3z.experts.down_projz.experts.w2w2)rJ  )r/   r$  rd  suffixr]  r\  shard_id_listr4   r4   r5   _transform_expert_name   s   



z5Llama4ForConditionalGeneration._transform_expert_namec                 C   s|   ddl }|d|}| |\}}	}	||vrdS || }
|r+t|d}||
j|< nt|D ]}||
j|< q/|| dS )a  Handle quantization scale parameters for expert weights.

        Args:
            name: Parameter name containing scale information
            loaded_weight: Scale tensor to be loaded
            params_dict: Dictionary of model parameters
            num_experts: Total number of experts for broadcast operations
            loaded_params: Set of loaded parameter names

        Returns:
            bool: True (always handles scale parameters)
        r   Nzexperts\.(\d+)\.TrH   )researchrm  r@   groupdatar   rL  )r/   r$  r%  rQ  rA  rS  rn  expert_matchr]  r:   r^  rc  r4   r4   r5   r`  =  s   
z:Llama4ForConditionalGeneration._handle_expert_scale_paramsc              	   C   s   | j |dd\}}}d|v r|jddd}	n|g}	t|gt| |	|D ]A\}
}}|
|vr.q$||
 }|j}||
 | dkrRt|D ]}|||j|
||d qDq$t|D ]}|||| j|
||d qVq$dS )a  Handle actual weight tensors for expert layers (gate_up_proj and down_proj).

        Args:
            name: Parameter name (should contain gate_up_proj or down_proj)
            loaded_weight: Weight tensor(s) to be loaded
            params_dict: Dictionary of model parameters
            num_experts: Total number of experts for tensor distribution
            loaded_params: Set of loaded parameter names

        Returns:
            bool: True (always handles weight parameters)
        T)rd  rf  rG   rF   r   rb  )	rm  chunkziplenrZ  rL  r   r   T)r/   r$  r%  rQ  rA  rS  r]  r:   rl  loaded_weight_listr[  weight_chunkr\  r^  rZ  rc  r4   r4   r5   ra  m  sD   

	z;Llama4ForConditionalGeneration._handle_expert_weight_paramsc                 C   s8   | dr||vrdS || }t|dt}||| dS )zHandle default weight loading.z.biasNrZ  )endswithr   r   )r/   r$  r%  rQ  r^  rZ  r4   r4   r5   rO    s
   z5Llama4ForConditionalGeneration._handle_default_weight	layer_idsc                 C   s    t | jdr| j| d S d S )Nset_eagle3_layers_to_capture)r   r   r{  )r/   rz  r4   r4   r5   r{    s   z;Llama4ForConditionalGeneration.set_eagle3_layers_to_capturec                 C   sB   | j  }t| j dr| j  S t| j dr|| j jjfS |d fS )Nget_embed_and_headlm_head)r   	get_embedr   r|  r}  r/  r/   embedr4   r4   r5   r|    s   

z1Llama4ForConditionalGeneration.get_embed_and_headc                 C   s&   t | jdr| j||S | j|S )Nset_embed_and_head)r   r   r  	set_embed)r/   r  headr4   r4   r5   r    s   z1Llama4ForConditionalGeneration.set_embed_and_headc                 C   s
   | j  S r8   )r   r~  r,  r4   r4   r5   r~    s   
z(Llama4ForConditionalGeneration.get_embedc                 C   s   | j |S r8   )r   r  r  r4   r4   r5   r    s   z(Llama4ForConditionalGeneration.set_embedc                 C   s   |dkr| j j| j j| j j| j jd   fS |dkr&| j j| j j | j jfS |dkr4| j j| j jd fS |dkrI| j | }| }|| j jfS t	 )Nr   rG   o_projr   r=  )
re   rt   r(  ru   r4  r!   r   
get_layersget_intermediate_sizeNotImplementedError)r/   r  r   decoder_layerr!   r4   r4   r5   get_hidden_dim  s&   
z-Llama4ForConditionalGeneration.get_hidden_dimr   )Fr8   )4r=   r>   r?   packed_modules_mappingrn  compiler  r   r   r   rB   r*   rA   r   r   r   r@   r   r  r   rC   rD   r  r  r   objectr;   r   r7  r   r   rU  rH  rI  rC  rK  listrG  rM  rN  r_  rm  r`  ra  rO  r{  r|  r  r~  r  r  rE   r4   r4   r2   r5   r     s   <"


$"D


&
 

0
@
r   )Bjsonr   loggingrK   r   rn  collections.abcr   typingr   r   r   r   rC   r   transformersr   r	   *transformers.models.llama4.modeling_llama4r
   r   "sglang.srt.layers.attention.visionr   sglang.srt.layers.linearr   r   r   "sglang.srt.layers.logits_processorr   &sglang.srt.layers.moe.fused_moe_tritonr   sglang.srt.layers.quantizationr   sglang.srt.managers.mm_utilsr   r   "sglang.srt.managers.schedule_batchr   r   r   ,sglang.srt.model_executor.forward_batch_infor   sglang.srt.server_argsr   sglang.srt.utilsr   r2  $sglang.srt.model_loader.weight_utilsr   r   r   	getLoggerr=   r   Moduler   r[   r\   rm   rn   r   r   r   r   r   
EntryClassr4   r4   r4   r5   <module>   sP    
*=.#Y    S