o
    پi0                     @   s0   d dl mZ d dlmZmZ G dd deZdS )    )PretrainedConfig)KimiLinearCacheParamsKimiLinearStateShapec                *       sP  e Zd ZdZdgZ												
																											d9dedB dedededB dedB dededededededededB d edB d!edB d"edB d#edB d$edB d%ed&e	dB f( fd'd(Z
ed)d* Zed+d, Zed-efd.d/Zd0efd1d2Zed3d4 Zed5d6 Zed-efd7d8Z  ZS ):KimiLinearConfigkimi_linearpast_key_values     N +      silu{Gz?ư>Tr              @Fsigmoid      ?moe_intermediate_sizemoe_renormalizemoe_router_activation_funcnum_expertsnum_experts_per_tokennum_shared_expertsrouted_scaling_factorfirst_k_dense_replacemoe_layer_frequse_grouped_topknum_expert_group
topk_groupq_lora_rankkv_lora_rankqk_nope_head_dimqk_rope_head_dim
v_head_dimmla_use_nopenum_nextn_predict_layerslinear_attn_configc'           (         sD  || _ || _|| _|d ur|n|| | _|| _|| _|| _|d u r#|}|| _|	| _|
| _	|| _
|| _|| _|| _|| _| | _|!| _|"| _|#| _|$| _| | _| _|| _|| _|| _|| _|| _| jdv sfJ || _|| _|| _|| _|| _|| _ |%| _!|&d ur|&d d usJ |&d d usJ |&| _"t# j$d||||d|' d S )N)softmaxr   
kda_layersfull_attn_layers)pad_token_idbos_token_ideos_token_idtie_word_embeddings )%
model_type
vocab_sizehidden_sizehead_dimintermediate_sizenum_hidden_layersnum_attention_headsnum_key_value_heads
hidden_actinitializer_rangerms_norm_eps	use_cache
rope_thetarope_scalingr    r!   r"   r#   r$   r%   n_routed_expertsr   r   r   r   r   r   r   r   r   r   r   r   r&   r'   super__init__)(selfr0   r1   r2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r+   r,   r-   r<   r=   r.   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   r&   r'   kwargs	__class__r/   R/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/configs/kimi_linear.pyr@      sb   *
zKimiLinearConfig.__init__c                 C   s<   | j d up| jd up| jd up| jd up| jd up| jdu S )NT)r    r!   r"   r#   r$   r%   rA   r/   r/   rE   is_mlan   s   
zKimiLinearConfig.is_mlac                 C   s
   | j d uS )N)r   rF   r/   r/   rE   is_moey   s   
zKimiLinearConfig.is_moereturnc                 C   s8   | j d u pt| j to| j d d uot| j d dk S )Nr)   r   )r'   
isinstancedictlenrF   r/   r/   rE   is_linear_attn}   s   
zKimiLinearConfig.is_linear_attn	layer_idxc                 C   s   | j d uo|d | j d v S )Nr   r)   )r'   )rA   rN   r/   r/   rE   is_kda_layer   s   
zKimiLinearConfig.is_kda_layerc                        fddt  jD S )Nc                    s   g | ]	}  |r|qS r/   rO   .0irF   r/   rE   
<listcomp>       z5KimiLinearConfig.linear_layer_ids.<locals>.<listcomp>ranger5   rF   r/   rF   rE   linear_layer_ids      z!KimiLinearConfig.linear_layer_idsc                    rP   )Nc                    s   g | ]	}  |s|qS r/   rQ   rR   rF   r/   rE   rU      rV   z=KimiLinearConfig.full_attention_layer_ids.<locals>.<listcomp>rW   rF   r/   rF   rE   full_attention_layer_ids   rZ   z)KimiLinearConfig.full_attention_layer_idsc                 C   s@   ddl m} tj| | jd | jd | jd d}t|| jdS )Nr   )get_attention_tp_size	num_headsr3   short_conv_kernel_size)tp_world_sizer]   r3   conv_kernel_size)shapelayers)sglang.srt.layers.dp_attentionr\   r   creater'   r   rY   )rA   r\   ra   r/   r/   rE   mamba2_cache_params   s   z$KimiLinearConfig.mamba2_cache_params)&r   r   r	   Nr
   r   r   Nr   r   r   Tr   r   r   r   NFNTr   NNr   r   r   r   Tr   r   NNNNNFr   N)__name__
__module____qualname__r0   keys_to_ignore_at_inferenceintboolstrfloatrK   r@   propertyrG   rH   rM   rO   rY   r[   r   re   __classcell__r/   r/   rC   rE   r      s     !"#$%&'c





r   N) transformers.configuration_utilsr   sglang.srt.configs.mamba_utilsr   r   r   r/   r/   r/   rE   <module>   s   