o
    پi(
                     @   s   d Z ddlmZmZ ddlmZmZ dedefddZ	dedefdd	Z
dedefd
dZeG dd deZeG dd deZdS )z=Qwen3 text encoder configuration for SGLang diffusion models.    )	dataclassfield)TextEncoderArchConfigTextEncoderConfignreturnc                 C   s   d| v ot | dd S )Nlayers.)strisdigitsplitr   m r   g/home/ubuntu/.local/lib/python3.10/site-packages/sglang/multimodal_gen/configs/models/encoders/qwen3.py_is_transformer_layer   s   r   c                 C   
   |  dS )Nembed_tokensendswithr   r   r   r   _is_embeddings      
r   c                 C   r   )Nnormr   r   r   r   r   _is_final_norm   r   r   c                   @   sn  e Zd ZU dZdZeed< dZeed< dZeed< dZ	eed	< d
Z
eed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< dZeed< d Zed B ed!< d"Zeed#< d$Zeed%< d"Zeed&< d'Zeed(< d)Zeed*< dZ eed+< e!d,d- d.Z"e#e$eeef  ed/< e!d0d- d.Z%e#ed1< d5d3d4Z&d S )6Qwen3TextArchConfigzArchitecture config for Qwen3 text encoder.

    Qwen3 is similar to LLaMA but with QK-Norm (RMSNorm on Q and K before attention).
    iQ 
vocab_sizei 
  hidden_sizei &  intermediate_size$   num_hidden_layers    num_attention_heads   num_key_value_headssilu
hidden_acti   max_position_embeddingsg{Gz?initializer_rangegư>rms_norm_epsT	use_cachei[P pad_token_idbos_token_idi]P eos_token_idtie_word_embeddingsg    .A
rope_thetaNrope_scalingFattention_biasg        attention_dropoutmlp_bias   head_dimi   text_lenoutput_hidden_statesc                   C   s   g dS )N))	.qkv_projz.q_projq)r8   z.k_projk)r8   z.v_projv).gate_up_projz
.gate_projr   )r<   z.up_proj   r   r   r   r   r   <lambda>9   s    zQwen3TextArchConfig.<lambda>default_factorystacked_params_mappingc                   C   s
   t ttgS )N)r   r   r   r   r   r   r   r>   E   s   
 _fsdp_shard_conditionsr   c                 C   s   dd| j dd| _d S )N
max_lengthTpt)padding
truncationrC   return_tensors)r6   tokenizer_kwargs)selfr   r   r   __post_init__H   s
   z!Qwen3TextArchConfig.__post_init__)r   N)'__name__
__module____qualname____doc__r   int__annotations__r   r   r    r"   r$   r&   r   r'   r(   floatr)   r*   boolr+   r,   r-   r.   r/   r0   dictr1   r2   r3   r5   r6   r7   r   rA   listtuplerB   rJ   r   r   r   r   r      s@   
 r   c                   @   s0   e Zd ZU dZeedZeed< dZ	e
ed< dS )Qwen3TextConfigz(Top-level config for Qwen3 text encoder.r?   arch_configqwen3prefixN)rK   rL   rM   rN   r   r   rW   r   rP   rY   r   r   r   r   r   rV   Q   s   
 rV   N)rN   dataclassesr   r   2sglang.multimodal_gen.configs.models.encoders.baser   r   r   rR   r   r   r   r   rV   r   r   r   r   <module>   s   8