o
    پi                     @   st   d Z ddlmZmZ ddlmZ ddlmZ ddlm	Z	 ddl
mZmZmZ e	eZG dd deZeejd	< d
S )z.LFM2 (Liquid Foundation Model 2) configuration    )ListOptional)CONFIG_MAPPING)
Lfm2Config)logging)Mamba2CacheParamsMamba2StateShapemamba2_state_dtypec                   @   sd   e Zd ZdZedee fddZedee fddZedefddZ	ede
e fd	d
ZdS )r   z
    SGLang configuration for LFM2 models.

    Extends HuggingFace's Lfm2Config with hybrid model properties needed by SGLang.
    LFM2 uses a hybrid architecture mixing full attention and ShortConv layers.
    returnc                 C      dd t | jD S )z0Return indices of attention layers for KV cache.c                 S   s   g | ]
\}}|d kr|qS )full_attention .0iltr   r   K/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/configs/lfm2.py
<listcomp>+   s    z7Lfm2Config.full_attention_layer_ids.<locals>.<listcomp>	enumeratelayer_typesselfr   r   r   full_attention_layer_ids(   s   z#Lfm2Config.full_attention_layer_idsc                 C   r   )z3Return indices of conv layers for conv state cache.c                 S   s   g | ]
\}}|d v r|qS ))conv
short_convr   r   r   r   r   r   0   s    z/Lfm2Config.linear_layer_ids.<locals>.<listcomp>r   r   r   r   r   linear_layer_ids-   s   zLfm2Config.linear_layer_idsc                 C   s   dS )zJReturn chunk size for Mamba2 backend. LFM2 doesn't use chunking, return 1.   r   r   r   r   r   mamba_chunk_size4   s   zLfm2Config.mamba_chunk_sizec              	   C   sz   ddl m} | j}|sdS | j}t| j}z| }W n ttfy'   d}Y nw tj	||d||d|d}t
||t| dS )z
        Get cache params for HybridReqToTokenPool initialization.

        LFM2 uses ShortConv layers with a small fixed-size cache (kernel_size - 1).
        Unlike full Mamba2 models, LFM2 only uses the conv state, not SSM temporal state.
        r   )get_attention_tp_sizeNr   )tp_world_sizeintermediate_sizen_groups	num_headshead_dim
state_sizeconv_kernel)shapelayersdtype)sglang.srt.layers.dp_attentionr   r   hidden_sizeintconv_L_cacheAssertionErrorRuntimeErrorr   creater   r	   )r   r   conv_layer_idsr+   r&   tp_sizer'   r   r   r   mamba2_cache_params9   s2   


zLfm2Config.mamba2_cache_paramsN)__name__
__module____qualname____doc__propertyr   r,   r   r   r   r   r   r3   r   r   r   r   r       s    r   lfm2N)r7   typingr   r   transformersr   r   HFLfm2Configtransformers.utilsr   sglang.srt.configs.mamba_utilsr   r   r	   
get_loggerr4   logger_extra_contentr   r   r   r   <module>   s   
H