o
    پi                     @   sd   d Z ddlZddlmZ ddlmZ ddlmZmZ e	e
ZG dd dejZG dd	 d	eZdS )
z!BailingHybrid model configuration    N)PretrainedConfig)logging)Mamba2CacheParamsMamba2StateShapec                   @   s   e Zd ZdZdZdS )HybridLayerType	attentionlinear_attentionN)__name__
__module____qualname__full_attentionr    r   r   U/home/ubuntu/.local/lib/python3.10/site-packages/sglang/srt/configs/bailing_hybrid.pyr      s    r   c                       s   e Zd ZdZdgZ									
	
		
																				
								
						d& fdd	Zedd Zedd  Zed!d" Z	ed#e
fd$d%Z  ZS )'BailingHybridConfigbailing_hybridpast_key_values f                siluFư>        {Gz?       O"ATNd                r         ?@   c.           /         s<  || _ || _|| _|| _|| _|| _|| _|| _|	| _|
| _	|| _
|| _|| _|!| _|"| _|| _|| _|| _|| _|| _|pC| j| j | _|| _| | _|#| _|$| _|| _|| _|| _|| _|| _|| _|| _|| _ |%| _!|&| _"|'| _#|| _$|(| _%|)| _&|*| _'|+| _(|,| _)|,|* | _*|-| _+d| _,t- j.d|||d|. d S )NF)pad_token_ideos_token_idtie_word_embeddingsr   )/num_hidden_layers
vocab_sizehidden_sizeintermediate_sizenum_attention_headsnum_key_value_heads
hidden_actuse_qkv_biasuse_biasrms_norm_epsembedding_dropoutattention_dropoutoutput_dropoutnum_nextn_predict_layersmtp_loss_scaling_factorinitializer_rangemax_position_embeddings
rope_theta	use_cachemax_window_layershead_dimrope_scalinguse_qk_normmoe_router_enable_expert_biasrouted_scaling_factornum_expertsnum_shared_expertsnum_experts_per_tokn_group
topk_groupmoe_intermediate_sizefirst_k_dense_replaceoutput_router_logitslayer_group_sizegroup_norm_sizelinear_silunum_linear_key_value_headskv_lora_rankq_lora_rankqk_rope_head_dim
v_head_dimqk_nope_head_dimqk_head_dimrope_interleavefor_nextn_modelsuper__init__)/selfr*   r+   r,   r)   r-   r.   r/   r0   r1   r2   r(   r3   r4   r5   r8   r9   r:   r;   r<   r>   r&   r'   rB   rC   rD   rE   rF   rG   rH   r=   rI   r?   r6   r7   r@   rA   rJ   rK   rL   rN   rO   rP   rQ   rR   rT   kwargs	__class__r   r   rW   %   sh   1

zBailingHybridConfig.__init__c                 C   sX   | j rtjjgS g }t| jD ]}|d | j dkr"|tjj q|tjj q|S )Nr    r   )	rU   r   r   valueranger)   rJ   appendr   )rX   layer_type_listlr   r   r   layers_block_type   s   
z%BailingHybridConfig.layers_block_typec                 C      dd t | jD S )Nc                 S       g | ]\}}|t jjkr|qS r   )r   r   r\   .0i
type_valuer   r   r   
<listcomp>   
    z8BailingHybridConfig.linear_layer_ids.<locals>.<listcomp>	enumeratera   rX   r   r   r   linear_layer_ids      z$BailingHybridConfig.linear_layer_idsc                 C   rb   )Nc                 S   rc   r   )r   r   r\   rd   r   r   r   rh      ri   z@BailingHybridConfig.full_attention_layer_ids.<locals>.<listcomp>rj   rl   r   r   r   full_attention_layer_ids   rn   z,BailingHybridConfig.full_attention_layer_idsreturnc              	   C   s:   ddl m} tj| dd| j| j| jdd}t|| jdS )Nr   )get_attention_tp_sizer    )tp_world_sizer,   n_groups	num_headsr=   
state_sizeconv_kernel)shapelayers)sglang.srt.layers.dp_attentionrq   r   createrM   r=   r   rm   )rX   rq   rw   r   r   r   mamba2_cache_params   s   
z'BailingHybridConfig.mamba2_cache_params)-r   r   r   r   r   r   r   FFr   Fr   r   r   r   r   r   Tr   Nr   r   r   r    r!   r!   r   r"   r    r#   FTr   r   Tr$   r    r    Fr"   Nr%   r#   r#   T)r	   r
   r   
model_typekeys_to_ignore_at_inferencerW   propertyra   rm   ro   r   r{   __classcell__r   r   rZ   r   r       sr    j


r   )__doc__enum transformers.configuration_utilsr   transformers.utilsr   sglang.srt.configs.mamba_utilsr   r   
get_loggerr	   loggerEnumr   r   r   r   r   r   <module>   s   
