o
    
۾i                     @   s4   d dl mZ d dlmZ eeZG dd deZdS )    )PretrainedConfig)init_loggerc                *       s$  e Zd ZdZdgZ												
																										d2dedB dedededB dedB dededededededededB dedB d edB d!edB d"edB d#edB d$ed%e	dB f( fd&d'Z
ed(d) Zed*d+ Zed,efd-d.Zd/efd0d1Z  ZS )3KimiLinearConfigkimi_linearpast_key_values     N +      silu{Gz?ư>Tr         Fsigmoid      ?moe_intermediate_sizemoe_renormalizemoe_router_activation_funcnum_expertsnum_experts_per_tokennum_shared_expertsrouted_scaling_factorfirst_k_dense_replacemoe_layer_frequse_grouped_topknum_expert_group
topk_groupq_lora_rankkv_lora_rankqk_nope_head_dimqk_rope_head_dim
v_head_dimmla_use_nopenum_nextn_predict_layerslinear_attn_configc&           )         sp  || _ || _|| _|d ur|n|| | _|| _|| _|| _|d u r#|}|| _|	| _|
| _	|| _
|| _|&dd }'|'p?|p?ddi}|&dd}(d|vrN|(|d< || _|| _|| _| | _|!| _|"| _|#| _|| _|| _|| _|| _|| _|| _| jdv s|J || _|| _|| _|| _|| _|| _|$| _ |%d ur|%d d usJ |%d d usJ |%| _!t" j#d
||||d	|& d S )Nrope_scaling	rope_typedefault
rope_thetag     @)softmaxr   
kda_layersfull_attn_layers)pad_token_idbos_token_ideos_token_idtie_word_embeddings )$
model_type
vocab_sizehidden_sizehead_dimintermediate_sizenum_hidden_layersnum_attention_headsnum_key_value_heads
hidden_actinitializer_rangerms_norm_eps	use_cachepoprope_parametersr   r   r    r!   r"   r#   r   r   r   r   r   r   r   r   r   r   r   r   r$   r%   super__init__))selfr2   r3   r4   r5   r6   r7   r8   r9   r:   r;   r<   r=   r-   r.   r/   r?   r0   r   r   r   r   r   r   r   r   r   r   r   r   r   r   r    r!   r"   r#   r$   r%   kwargsr&   r)   	__class__r1   _/home/ubuntu/.local/lib/python3.10/site-packages/vllm/transformers_utils/configs/kimi_linear.pyrA      sj   )
zKimiLinearConfig.__init__c                 C   s<   | j d up| jd up| jd up| jd up| jd up| jdu S )NT)r   r   r    r!   r"   r#   rB   r1   r1   rF   is_mlav   s   
zKimiLinearConfig.is_mlac                 C   s
   | j d uS )N)r   rG   r1   r1   rF   is_moe   s   
zKimiLinearConfig.is_moereturnc                 C   s8   | j d u pt| j to| j d d uot| j d dk S )Nr+   r   )r%   
isinstancedictlenrG   r1   r1   rF   is_linear_attn   s   
zKimiLinearConfig.is_linear_attn	layer_idxc                 C   s   | j d uo|d | j d v S )Nr   r+   )r%   )rB   rO   r1   r1   rF   is_kda_layer   s   
zKimiLinearConfig.is_kda_layer)%r   r   r   Nr	   r
   r
   Nr   r   r   Tr   r   r   NFNTr   NNr   r   r   r   Tr   r   NNNNNFr   N)__name__
__module____qualname__r2   keys_to_ignore_at_inferenceintboolstrfloatrL   rA   propertyrH   rI   rN   rP   __classcell__r1   r1   rD   rF   r      s     !"#$%&g



r   N) transformers.configuration_utilsr   vllm.loggerr   rQ   loggerr   r1   r1   r1   rF   <module>   s   