o
    پi                     @   s  d Z ddlZddlZddlmZ ddlmZ ddlmZm	Z	m
Z
mZmZmZ ddlZddlmZ ddlmZ ddlmZmZ dd	lmZmZmZmZmZmZmZmZ dd
lmZ ddl m!Z! ddl"m#Z#m$Z$ ddl%m&Z&m'Z'm(Z( ddl)m*Z* ddl+m,Z,m-Z-m.Z. ddl/m0Z0 ddl1m2Z2 ddl3m4Z4 ddl5m6Z6 ddl7m8Z8 ddl9m:Z: ddl;m<Z< ddl=m>Z> ddl?m@Z@ ddlAmBZB ddlCmDZDmEZE ddlFmGZGmHZH ddlImJZJmKZK ddlLmMZM ddlNmOZOmPZP ddlQmRZR dd lSmTZTmUZUmVZVmWZWmXZX dd!lYmZZZ eV Z[eW Z\e[rdd"l]m^Z^ G d#d$ d$eZ_e`eaZbd%d& ZcG d'd( d(ejdZeeZd)d*d+efd)ejgd,ejgfd-d.ZhG d/d0 d0ejdZiG d1d2 d2ejdZjG d3d4 d4ejdZkG d5d6 d6ejdZld7eeemejgf  fd8d9Znd:d; ZoG d<d= d=ZpelZqdS )>z@Inference-only GptOss model compatible with HuggingFace weights.    N)Iterable)partial)AnyDictListOptionalTupleUnion)nn)PretrainedConfig)get_forward_contextis_in_piecewise_cuda_graph)get_moe_expert_parallel_rank"get_moe_expert_parallel_world_sizeget_moe_tensor_parallel_rank"get_moe_tensor_parallel_world_sizeget_pp_groupget_tensor_model_parallel_rank$get_tensor_model_parallel_world_size tensor_model_parallel_all_reduce)'get_global_expert_distribution_recorder)ModelConfigForExpertLocation)LayerCommunicatorLayerScatterModes)get_attention_tp_rankget_attention_tp_sizeis_dp_attention_enabled)RMSNorm)QKVParallelLinearReplicatedLinearRowParallelLinear)LogitsProcessor)get_moe_a2a_backend)get_moe_impl_class)FusedMoE)TopK)%filter_moe_weight_param_global_expert)QuantizationConfig)dequant_mxfp4)RadixAttention)get_rope)PPMissingLayerget_layer_id)ParallelLMHeadVocabParallelEmbedding)ForwardBatchPPProxyTensors)default_weight_loader)create_fused_set_kv_buffer_argenable_fused_set_kv_buffer)get_global_server_args)	LazyValue
add_prefixis_cudais_npumake_layers)register_custom_op)FusedSetKVBufferArgc                       s    e Zd ZdZ fddZ  ZS )GptOssConfiggpt_ossc                    s   t  jdi | d S )N )super__init__)selfkwargs	__class__r>   M/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/models/gpt_oss.pyr@   \   s   zGptOssConfig.__init__)__name__
__module____qualname__
model_typer@   __classcell__r>   r>   rC   rE   r<   Y   s    r<   c                 C   s
   | j d S N   )sliding_window)configr>   r>   rE   !get_attention_sliding_window_sizee      
rO   c                	       s   e Zd Z		ddededee def fddZ			dd
e	j
dee dede	j
fddZdd Z		dd
e	j
dede	j
fddZ  ZS )GptOssSparseMoeBlockN layer_idrN   quant_configprefixc                    s   t    t | _|| _|j| _t|dd| _|j	| _
t|jd|d| _|j| _t|}i }|jdkrC|d ur;| nd }d|dki}|d|jt j |j||j|j|| j| j| j
dtd|d	|| _t|j|jdd td
||jd| _d S )Nhidden_act_alphagZd;?T)top_krenormalizerS   r$   use_weight_loader_fusedmxfp4experts)num_expertsrW   rS   hidden_sizeintermediate_sizerT   
activationgemm1_alphagemm1_clamp_limit	with_biasrU   gate)biasrT   rU   params_dtyper>   )r?   r@   r   tp_sizerS   
hidden_actr_   getattrr`   swiglu_limitra   r%   num_experts_per_toktopkrW   r#   rF   get_namenum_local_expertsr4   ep_num_redundant_expertsr]   r^   r6   r[   r   torch_dtyperouter)rA   rS   rN   rT   rU   experts_typeextra_kwargsquant_config_namerC   r>   rE   r@   j   s\   

zGptOssSparseMoeBlock.__init__Fhidden_statesforward_batchshould_allreduce_fusionreturnc                 C   s   t   s| ||S td)Nz)forward_deepep branch not implemented yet)r"   	is_deepepforward_normal	Exception)rA   rt   ru   rv   r>   r>   rE   forward   s   
zGptOssSparseMoeBlock.forwardc                    s    fdd j  D S )Nc                    s.   g | ]\}}|d vrt || jjr|jqS ))correction_bias)r&   r[   rm   data).0namexrA   r>   rE   
<listcomp>   s    
z8GptOssSparseMoeBlock.get_moe_weights.<locals>.<listcomp>)r[   named_parametersr   r>   r   rE   get_moe_weights   s   
z$GptOssSparseMoeBlock.get_moe_weightsc           
      C   sj   |j \}}t rt| j|}n| |\}}| ||}| ||}| jdkr-|s-t|}|	||}	|	S rK   )
shaper   moe_implrS   rp   rk   r[   rf   r   view)
rA   rt   rv   
num_tokens
hidden_dimfinal_hidden_statesrouter_logits_topk_outputansr>   r>   rE   ry      s   
z#GptOssSparseMoeBlock.forward_normalNrR   )NF)F)rF   rG   rH   intr<   r   r'   strr@   torchTensorr/   boolr{   r   ry   rJ   r>   r>   rC   rE   rQ   i   s@    =
rQ   rt   )	out_shaperS   rw   c                 C   s:   t  }|j|  }||\}}|||}|||}|S N)r   moe_fusionsrp   rk   r[   )rS   rt   forward_context
moe_fusionr   r   r   r   r>   r>   rE   r      s   
r   c                !       s   e Zd Zdddddddddddejfd	ed
ededededeee	e
f  dedee dededee de	dede	dejddf  fddZdejdejdefddZd d! Zdejdejdedejfd"d#Z  ZS )$GptOssAttentionr   '  N    gư>FrR   r]   	num_headsnum_kv_headsrS   
rope_thetarope_scalingmax_position_embeddingshead_dimrms_norm_epsattention_biasrT   rU   sliding_window_size
layer_typere   rw   c                    s  t    || _|| _t }t }|| _| j| dksJ | j| | _|| _| j|kr5| j| dks4J n	|| j dks>J t	d| j| | _
|pM|| j | _| j| j | _| j
| j | _| jd | _|| _|| _t | _t|| j| j| j|
||||td|d
| _t j}|dkrtjntj}tjtj| j|ddd	| _t| j| j ||
|||d|td
|d	| _ t!| j| j|||d| _"|dv sJ |dk}t#| j| j| j| j
|td||r|ndd| _$|| _%d S )Nr   rL   g      qkv_proj)rd   re   rT   tp_rankrf   rU   
trtllm_mha)dtypeF)requires_grado_proj)rd   rT   r   rf   reduce_resultsre   rU   )
rotary_dimmax_positionbaser   >   full_attentionsliding_attentionr   attnr   )r   rS   rU   r   )&r?   r@   r]   r   r   r   total_num_headsr   total_num_kv_headsmaxr   r   q_sizekv_sizescalingr   r   r   r   r   r6   r   r4   attention_backendr   float32bfloat16r
   	Parameteremptysinksr    r   r*   
rotary_embr)   r   rS   )rA   r]   r   r   rS   r   r   r   r   r   r   rT   rU   r   r   re   attn_tp_rankattn_tp_sizeattn_backendsinks_dtypeuse_sliding_windowrC   r>   rE   r@      s   




	zGptOssAttention.__init__	positionsrt   ru   c                 C   s   |j d dkr||d fS | |\}}|j| j| j| jgdd\}}}i }	ts7dt|r4t|| j|dnd i}	| j	|||fi |	\}}||||f}
d ||
fS )Nr   r   dimfused_set_kv_buffer_arg)valuelayerru   )
r   r   splitr   r   _is_npur3   r2   r   r   )rA   r   rt   ru   qkvr   qkv
extra_argsinner_stater>   r>   rE   forward_prepare<  s$   
 
zGptOssAttention.forward_preparec                 C   sB   |\}}}|d u r|S | j || jt| d}| |\}}|S )N)r   save_kv_cache)r   r   r3   r   )rA   intermediate_statert   ru   r   attn_outputoutputr   r>   r>   rE   forward_coreX  s   
zGptOssAttention.forward_corec                 C   s   | j |||d}| |S )Nr   rt   ru   )r   r   )rA   r   rt   ru   sr>   r>   rE   r{   d  s   
zGptOssAttention.forward)rF   rG   rH   r   r   r   floatr   r   r   r   r   r'   r   r@   r   r/   r   r   r{   rJ   r>   r>   rC   rE   r      s~    	
d
r   c                       sz   e Zd Z			ddededee dededB ddf fd	d
Zde	j
de	j
dedee	j
 dee	j
e	j
f f
ddZ  ZS )GptOssDecoderLayerNrR   rN   rS   rT   rU   r   rw   c                    sv  t    || _|j| _t|dd}t|dd }t|dd}t|d|j|j }	|j}
|j}|d u r9t| j| _	n|| _	t
| j|j|j|||||	|
|td|| j	|j| |jd| _|| _t | _t | _d	| _d
| _d	}d	}tj||j| j||d| _| jrt| j||td|d| _ntdt|j|jd| _t|j|jd| _ t!| j| j| j | jp| j| jjd kd| _"d S )Nr   r   r   r   r   r   	self_attn)r]   r   r   rS   r   r   r   r   r   r   rU   r   r   re   TF)rS   
num_layersis_layer_sparseis_previous_layer_sparseis_next_layer_sparsemlprS   rN   rT   rU   z]Dense MLP is not implemented for GptOssDecoderLayer. Please use GptOssSparseMoeBlock instead.epsrL   )layer_scatter_modesinput_layernormpost_attention_layernormis_last_layer)#r?   r@   rN   r]   rh   num_attention_headsr   r   rO   r   r   num_key_value_headsr6   layer_typesro   r   rS   r   r   r   r   r   is_nextnr   init_newnum_hidden_layersr   rQ   r   NotImplementedErrorr   r   r   r   layer_communicator)rA   rN   rS   rT   rU   r   r   r   r   r   r   r   r   r   rC   r>   rE   r@   s  s   

zGptOssDecoderLayer.__init__r   rt   ru   residualc                 C   s   | j |||\}}|jd dkr| j|||d}| j |||\}}| j |}| |||}|r5d|_|sA| j |||\}}||fS )Nr   r   T)	r   prepare_attnr   r   prepare_mlp)should_fuse_mlp_allreduce_with_next_layerr   _sglang_needs_allreduce_fusionpostprocess_layer)rA   r   rt   ru   r   rv   r>   r>   rE   r{     s0   zGptOssDecoderLayer.forward)NrR   N)rF   rG   rH   r<   r   r   r'   r   r@   r   r   r/   r   r{   rJ   r>   r>   rC   rE   r   r  s8    Wr   c                       s   e Zd Zddefdedee dedee	j
 ddf
 fdd	Z		dd
ejdejdedejdee deejef fddZ  ZS )GptOssModelNrR   rN   rT   rU   decoder_layer_typerw   c                    s   t     j| _ j| _t | _trd _| jj	r*t
 j jt td|d| _nt | _p1tt j fdd| jj| jjtd|d\| _| _| _| jjr]t j jd| _ntd	d
| _g | _d S )Nnpu_swiglu_oaiembed_tokens)use_attn_tp_grouprU   c                    s   |  |dS )Nr   r>   )idxrU   rN   r   rT   r>   rE   <lambda>  s    z&GptOssModel.__init__.<locals>.<lambda>layers)pp_rankpp_sizerU   r   T)return_tuple)r?   r@   pad_token_idpadding_idx
vocab_sizer   pp_groupr   rg   is_first_rankr.   r]   r   r6   r   r+   r   r9   r   rank_in_group
world_sizer  start_layer	end_layeris_last_rankr   r   normlayers_to_capture)rA   rN   rT   rU   r   rC   r   rE   r@     s4   


zGptOssModel.__init__	input_idsr   ru   input_embedspp_proxy_tensorsc              	   C   s  | j jr|d u r| |}n|}d }n|d usJ |d }|d }g }t| j| jD ]2}	t |	" |	| jv r?|	||  | j
|	 }
|
||||\}}W d    n1 sWw   Y  q*| j jsht||dS |jd dkr|d u ry| |}n| ||\}}t|dkr|S ||fS )Nrt   r   )rt   r   r   )r	  r
  r   ranger  r  r   with_current_layerr  appendr  r  r0   r   r  len)rA   r  r   ru   r  r  rt   r   aux_hidden_statesir   r   r>   r>   rE   r{   "  s@   


zGptOssModel.forwardNN)rF   rG   rH   r   r   r   r'   r   typer
   Moduler@   r   r   r/   r0   r	   r{   rJ   r>   r>   rC   rE   r     s<    3r   c                       s0  e Zd ZdZ		d/dedee deddf fdd	Ze	d
d Z
e 		d0dejdejdedejdee dejfddZe	dd Ze	dd Zdd Z		d1deeeejf  dedefddZdd Zd d! Zg fdedefd"d#Zd$d% Zd&d' Zd2d(eee  fd)d*Z e!d+d, Z"d-d. Z#  Z$S )3GptOssForCausalLMFNrR   rN   rT   rU   rw   c                    sz   t    t  _| _| _t||td|d _t	|j
|jtd|t jd _t| _d _t fdd _d S )Nmodel)rU   lm_head)rU   r   Fc                      s    fddt  j jD S )Nc                    s4   i | ]}t  jj| jtr| jj| j qS r>   )
isinstancer  r  r   rQ   r   )r~   rS   r   r>   rE   
<dictcomp>l  s    z@GptOssForCausalLM.__init__.<locals>.<lambda>.<locals>.<dictcomp>)r  r  r  r>   r   r>   rE   r  l  s   
 z,GptOssForCausalLM.__init__.<locals>.<lambda>)r?   r@   r   r	  rN   rT   r   r6   r  r-   r  r]   r4   enable_dp_lm_headr   r!   logits_processorcapture_aux_hidden_statesr5    _routed_experts_weights_of_layer)rA   rN   rT   rU   rC   r   rE   r@   T  s$   



zGptOssForCausalLM.__init__c                 C      | j jS r   )r&  r   r   r>   r>   rE   routed_experts_weights_of_layers     z1GptOssForCausalLM.routed_experts_weights_of_layerr  r   ru   r  r  c                 C   sF   | j |||||d}d }| jr|\}}| jjr!| ||| j||S |S )N)r  )r  r%  r	  r  r$  r   )rA   r  r   ru   r  r  rt   r  r>   r>   rE   r{   w  s&   	zGptOssForCausalLM.forwardc                 C   r'  r   )r  r  r   r>   r>   rE   r    r)  zGptOssForCausalLM.start_layerc                 C   r'  r   )r  r  r   r>   r>   rE   r    r)  zGptOssForCausalLM.end_layerc                 C   s  i }d|d< d|d< d|d< t | jjD ]}d| d|d	| d
< d| d|d	| d< d| d|d	| d< d| d|d	| d< d| d|d	| d< d| d|d	| d< d| d|d	| d< d| d|d	| d< d| d|d	| d< d| d|d	| d< d| d|d	| d< d| d|d	| d < d| d!|d	| d"< d| d#|d	| d#< d| d$|d	| d%< d| d&|d	| d'< d| d(|d	| d)< q|S )*z<Generate default weight name mapping for GptOss safetensors.zmodel.embed_tokens.weightzembedding.weightzlm_head.weightzunembedding.weightzmodel.norm.weightz
norm.scalezmodel.layers.z.self_attn.q_proj.weightblock.z.attn.q_proj.weightz.self_attn.q_proj.biasz.attn.q_proj.biasz.self_attn.k_proj.weightz.attn.k_proj.weightz.self_attn.k_proj.biasz.attn.k_proj.biasz.self_attn.v_proj.weightz.attn.v_proj.weightz.self_attn.v_proj.biasz.attn.v_proj.biasz.self_attn.o_proj.weightz.attn.out.weightz.self_attn.o_proj.biasz.attn.out.biasz.self_attn.sinksz.attn.sinksz.input_layernorm.weightz.attn.norm.scalez.mlp.router.weightz.mlp.gate.weightz.mlp.router.biasz.mlp.gate.biasz .post_attention_layernorm.weightz.mlp.norm.scalez.mlp.experts.gate_up_projz.mlp.experts.gate_up_proj_biasz.mlp.gate_up_proj_biasz.mlp.experts.mlp2_weightz.mlp.down_projz.mlp.experts.mlp2_biasz.mlp.down_proj_bias)r  rN   r   )rA   weight_mappingrS   r>   r>   rE   _get_default_weight_mapping  sP   
















z-GptOssForCausalLM._get_default_weight_mappingweightsr   weight_name_mappingc                 C   sH   | j d ur
| j  nd }|dkr| j|||d d S | j|||d d S )NrZ   )r   r.  )rT   rl   _load_normal_weights_load_weights_mxfp4)rA   r-  r   r.  rs   r>   r>   rE   load_weights  s   

zGptOssForCausalLM.load_weightsc           	      C   st   g }g }|D ]#\}}d|v r"| j d ur"| j  dkr"|||f q|||f q| |}| j||||d d S )Nz.expertsrZ   )r   r.  other_loaded_param_names)rT   rl   r  _load_mxfp4_experts_weightsr/  )	rA   r-  r   r.  mxfp4_weightsnormal_weightsr   weightmxfp4_loaded_paramsr>   r>   rE   r0    s   


z%GptOssForCausalLM._load_weights_mxfp4c                 C   st  t |  }t }d}t }t }t }t }| jj}	|	| dks+J d|	d||	| }
t	
|
| }|| }| jj| dksDJ | jj}| jj| }|| }t|d | |	}|| }|d | }|D ]O\}}| }d|v r|dd}||d|	 d	 }|||d| d| d
f }|| }t|dt}||||d d d || qgd|v r|dd}||d	|	d  }|||d
|d |d f }|| }t|dt}||||d d d || qgd|v r|dd}|||d| d| d
f }|| }t|dt}||||d d d || qgd|v rP|dd}|||d
|| || f }|| }t|dt}||||d d d || qgd|v r|dd}|||d| d| f }|| }t|dt}||||d d d || qgd|v r|||d
f }|dkrt|}|dd}|| }t|dt}||||d d d || qg|S )N    r   zintermediate_size=z" must be divisible by mxfp4_block=rL   gate_up_proj_blocks
w13_weight   r   .weight_loader)weight_nameshard_id	expert_iddown_proj_blocks	w2_weightgate_up_proj_scalesw13_weight_scaledown_proj_scalesw2_weight_scalegate_up_proj_biasw13_weight_biasdown_proj_biasw2_weight_bias)dictr   setr   r   r   r   rN   r^   mathceilrm   mincudareplacer   
contiguousrh   r1   addr   
zeros_like)rA   r-  params_dictloaded_paramsmxfp4_blockmoe_tp_rankmoe_tp_sizemoe_ep_rankmoe_ep_sizer^   intermediate_size_block per_rank_intermediate_size_blockper_rank_intermediate_sizemoe_num_global_expertsmoe_num_local_expertsmoe_tp_rank_startmoe_tp_rank_endmoe_ep_rank_startmoe_ep_rank_endr   r6  new_namenarrow_weightparamr<  r>   r>   rE   r3    s  








z-GptOssForCausalLM._load_mxfp4_experts_weightsc                 C   s  t  }|rtd d S t| j|}t|dd d}g }|D ]\}}d|v rj|j| jj| jj | jj	| jj | jj	| jj gdd\}	}
}|
|dd |	f |
|dd	 |
f |
|dd
 |f qd|v r|j| jj| jj | jj	| jj | jj	| jj gdd\}}}|
|dd |f |
|dd |f |
|dd |f q|
||f q|}|d u r|  }n|  }|| |}g d}tjddddd}t|  }|D ]\}}t|}|r||v r|| }t|}|d urt| jdr|| jjk s|| jjkrqd|v rq|D ];\}}}||vr,q d|v r3q |||}|drF||vrFq ||vrMq || }|j}||||  n|D ]E}|\}}}||vrlq^|||}||vryq^|| }|j}d|vr|dd}d|v rt dkr| }|||||d  nM|dr||vrq||vrq|| v r|| }d|v rt |   }|j!"||||     qt#|dt$}||| qt%d | d! qd S )"NzKLoading weights for nextn is currently not supported in GptOssForCausalLM. c                 S   s   | d S )Nr   r>   )r   r>   r>   rE   r    s    z8GptOssForCausalLM._load_normal_weights.<locals>.<lambda>)keyz
qkv.weightr   r   zq_proj.weightzk_proj.weightzv_proj.weightzqkv.biaszq_proj.biaszk_proj.biaszv_proj.bias))r   q_projr   )r   k_projr   )r   v_projr   gate_up_proj	down_projrF  rH  )ckpt_gate_up_proj_nameckpt_down_proj_nameckpt_gate_up_proj_bias_nameckpt_down_proj_bias_namer  zrotary_emb.inv_freqzmlp.expertsz.biasrd   r   rI  )r>  r   r<  z
Parameter z not found in params_dict)&r   loggingwarning_canonicalize_weightsrN   sortedr   r   r   r   r  rP  r,  updater$    make_expert_params_mapping_fusedrJ  r   _WeightCreatormaybe_materializer,   hasattrr  r  r  endswithr<  	transposer   zero_keysr   numelr}   copy_rh   r1   logger)rA   r-  r   r.  r2  r   new_weightsr   prh  ri  rj  q_biask_biasv_biasdefault_mappingstacked_params_mappingexpert_params_mappingrT  loaded_weightrS   
param_namer=  r>  rf  r<  mappingstartr>   r>   rE   r/    s   














z&GptOssForCausalLM._load_normal_weightsc                 C   s   | j jj| jjfS r   )r  r   r6  r   r   r>   r>   rE   get_embed_and_headU  s   z$GptOssForCausalLM.get_embed_and_headc                 C   s8   | j j`| j`|| j j_|| j_tj  tj  d S r   )r  r   r6  r   r   rO  empty_cachesynchronize)rA   embedheadr>   r>   rE   set_embed_and_headX  s   

z$GptOssForCausalLM.set_embed_and_head	layer_idsc                 C   sX   | j jsd S |d u rd| _| jj}d|d |d g| j_d S d| _dd |D | j_d S )NTr;     c                 S   s   g | ]}|d  qS )rL   r>   )r~   valr>   r>   rE   r   l  s    zBGptOssForCausalLM.set_eagle3_layers_to_capture.<locals>.<listcomp>)r	  r  r%  rN   r   r  r  )rA   r  r   r>   r>   rE   set_eagle3_layers_to_capture`  s   z.GptOssForCausalLM.set_eagle3_layers_to_capturec                 C   s   t |j|jd dS )N)r   num_logical_experts
num_groups)r   r   rm   )clsrN   r>   r>   rE   $get_model_config_for_expert_locationn  s
   z6GptOssForCausalLM.get_model_config_for_expert_locationc                 C   s
   t | jS r   )rO   rN   r   r>   r>   rE   rO   v  rP   z3GptOssForCausalLM.get_attention_sliding_window_sizer   r  )FNr   )%rF   rG   rH   fall_back_to_pt_during_loadr<   r   r'   r   r@   propertyr(  r   no_gradr   r/   r0   r{   r  r  r,  r   r   r   rJ  r1  r0  r3  r/  r  r  r   r   r  classmethodr  rO   rJ   r>   r>   rC   rE   r  Q  sz    


G
 0
 !
r  
weights_inc              	   C   s   t |}t| jD ]1}dD ],}d| d| }|| dd }|| dd }|d ur9ttt|||d||< qq	t| S )N)mlp1_weightmlp2_weightr*  z.mlp.z.blocksz.scales)
debug_namew_blocksw_scales)	rJ  r  r   poprx  r   _dequant_mlp_weightlistitems)rN   r  weights_out_dictrS   
name_chunkname_prefixr  r  r>   r>   rE   rt  z  s$   rt  c              
   C   s   t  dkrtd|  d |j}| }| }t||tjd}|dd	 }t  dkrBtd|  d|j
d|j
d	|j
 ||S )
Nr   zDequantize z start)w_blockw_scale	out_dtyperq  r   z end w_blocks.shape=z w_scales.shape=z w_bf16.shape=)r   r  infodevicerO  r(   r   r   r|  rQ  r   to)r  r  r  original_devicew_bf16r>   r>   rE   r    s   

 
r  c                   @   s    e Zd Zdd Zedd ZdS )rx  c                 C   s
   || _ d S r   )_fn)rA   fnr>   r>   rE   r@     rP   z_WeightCreator.__init__c                 C   s    t | tr|  }d | _|S | S r   )r!  rx  r  )objr   r>   r>   rE   ry    s
   
z _WeightCreator.maybe_materializeN)rF   rG   rH   r@   staticmethodry  r>   r>   r>   rE   rx    s    rx  )r__doc__rr  rL  collections.abcr   	functoolsr   typingr   r   r   r   r   r	   r   r
   transformersr   0sglang.srt.compilation.piecewise_context_managerr   r   sglang.srt.distributedr   r   r   r   r   r   r   r   #sglang.srt.eplb.expert_distributionr   sglang.srt.eplb.expert_locationr   sglang.srt.layers.communicatorr   r   sglang.srt.layers.dp_attentionr   r   r   sglang.srt.layers.layernormr   sglang.srt.layers.linearr   r   r    "sglang.srt.layers.logits_processorr!   sglang.srt.layers.moer"   "sglang.srt.layers.moe.ep_moe.layerr#   ,sglang.srt.layers.moe.fused_moe_triton.layerr$   sglang.srt.layers.moe.topkr%   sglang.srt.layers.moe.utilsr&   *sglang.srt.layers.quantization.base_configr'   (sglang.srt.layers.quantization.fp8_utilsr(   !sglang.srt.layers.radix_attentionr)   "sglang.srt.layers.rotary_embeddingr*   sglang.srt.layers.utilsr+   r,   *sglang.srt.layers.vocab_parallel_embeddingr-   r.   ,sglang.srt.model_executor.forward_batch_infor/   r0   $sglang.srt.model_loader.weight_utilsr1   sglang.srt.models.utilsr2   r3   sglang.srt.server_argsr4   sglang.srt.utilsr5   r6   r7   r8   r9   sglang.srt.utils.custom_opr:   _is_cudar   
sgl_kernelr;   r<   	getLoggerrF   r  rO   r  rQ   r   r   r   r   r   r   r  r   rt  r  rx  
EntryClassr>   r>   r>   rE   <module>   sv    (

d	  ^    -