o
    پiQ                     @   s~   d Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	m
Z
 G dd deZz	ede W d	S  ey>   eejd< Y d	S w )
a%  LFM2-MoE (Liquid Foundation Model 2 - Mixture of Experts) configuration

Note: HF transformers has Lfm2MoeConfig in v5.0.0rc2 (unreleased).
Once released, we could inherit from it like Lfm2Config does with HFLfm2Config.
For now, we define a standalone config to support the model immediately.
    )ListOptional)CONFIG_MAPPING)PretrainedConfig)Mamba2CacheParamsMamba2StateShapec                5       s$  e Zd ZdZdZdgZ									
																	d:dedededededededededed ed!ed"ed#ed$ed%e	e
 d&ed'ed(ed)ed*ed+ed,ed-ed.e	ee  f2 fd/d0Zed1ee fd2d3Zed1ee fd4d5Zed1efd6d7Zed1e	e fd8d9Z  ZS );Lfm2MoeConfiga  
    Configuration for LFM2-MoE models (e.g., LiquidAI/LFM2-8B-A1B).

    LFM2-MoE is a hybrid architecture with:
    - Attention layers and ShortConv layers (like dense LFM2)
    - MoE (Mixture of Experts) FFN layers with sigmoid routing

    Key MoE specifics:
    - First `num_dense_layers` use dense MLP, rest use MoE
    - Sigmoid routing (not softmax) with expert_bias for load balancing
    - expert_bias is fp32 for numerical stability
    lfm2_moepast_key_values                     {Gz?h㈵>Tr         NF            ?
vocab_sizehidden_sizeintermediate_sizemoe_intermediate_sizenum_hidden_layersnum_attention_headsnum_key_value_headsmax_position_embeddingsinitializer_rangenorm_eps	use_cachepad_token_idbos_token_ideos_token_idtie_word_embeddingsrope_parameters	conv_biasconv_L_cachenum_dense_layersnum_expertsnum_experts_per_tokuse_expert_biasrouted_scaling_factornorm_topk_problayer_typesc                    s   || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _|| _|| _|| _|| _|| _|| _|| _|| _|| _|d urVt||krVtdt| d| d|d|}t jd||||d| d S )Nzlayer_types length (z ) must match num_hidden_layers ()tie_embedding)r$   r%   r&   r'    )r   r   r   r   r   r   r   r    r!   r"   r#   r)   r*   r+   r,   r-   r.   r/   r0   r1   r(   len
ValueErrorpopsuper__init__)selfr   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   r(   r)   r*   r+   r,   r-   r.   r/   r0   r1   kwargs	__class__r4   O/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/configs/lfm2_moe.pyr9   -   sH   
zLfm2MoeConfig.__init__returnc                 C   "   | j du rg S dd t| j D S )z0Return indices of attention layers for KV cache.Nc                 S   s   g | ]
\}}|d kr|qS )full_attentionr4   .0iltr4   r4   r>   
<listcomp>   s    z:Lfm2MoeConfig.full_attention_layer_ids.<locals>.<listcomp>r1   	enumerater:   r4   r4   r>   full_attention_layer_ids|   s   
z&Lfm2MoeConfig.full_attention_layer_idsc                 C   r@   )z3Return indices of conv layers for conv state cache.Nc                 S   s   g | ]
\}}|d v r|qS ))conv
short_convr4   rB   r4   r4   r>   rF      s    z2Lfm2MoeConfig.linear_layer_ids.<locals>.<listcomp>rG   rI   r4   r4   r>   linear_layer_ids   s
   
zLfm2MoeConfig.linear_layer_idsc                 C   s   dS )z@Return chunk size for Mamba2 backend. LFM2 doesn't use chunking.r   r4   rI   r4   r4   r>   mamba_chunk_size   s   zLfm2MoeConfig.mamba_chunk_sizec              	   C   st   ddl m} | j}|sdS | j}t| j}z| }W n ttfy'   d}Y nw tj	||d||d|d}t
||dS )z
        Get cache params for HybridReqToTokenPool initialization.

        LFM2-MoE uses ShortConv layers with a small fixed-size cache.
        r   )get_attention_tp_sizeNr   )tp_world_sizer   n_groups	num_headshead_dim
state_sizeconv_kernel)shapelayers)sglang.srt.layers.dp_attentionrO   rM   r   intr*   AssertionErrorRuntimeErrorr   creater   )r:   rO   conv_layer_idsr   rU   tp_sizerV   r4   r4   r>   mamba2_cache_params   s0   

z!Lfm2MoeConfig.mamba2_cache_params)r   r   r   r   r   r   r   r   r   r   Tr   r   r   TNFr   r   r   r   Tr   TN)__name__
__module____qualname____doc__
model_typekeys_to_ignore_at_inferencerY   floatboolr   dictr   strr9   propertyrJ   rM   rN   r   r_   __classcell__r4   r4   r<   r>   r      s    	

Or   r	   N)rc   typingr   r   transformersr    transformers.configuration_utilsr   sglang.srt.configs.mamba_utilsr   r   r   register	Exception_extra_contentr4   r4   r4   r>   <module>   s    !